Beispiel #1
0
random_agent = RandomAgent(action_num=eval_env.action_num)

# Other agents
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
    env.game.num_players, env.game.num_cards, episode_num))

# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
# logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
Beispiel #2
0
    with tf.Session() as sess:

        # Set agents
        global_step = tf.Variable(0, name='global_step', trainable=False)
        agent = DQNAgent(sess,
                         scope='dqn',
                         action_num=env.action_num,
                         replay_memory_init_size=memory_init_size,
                         state_shape=env.state_shape,
                         mlp_layers=[10, 10])
        env.set_agents([agent])
        eval_env.set_agents([agent])
        sess.run(tf.global_variables_initializer())

        # Init a Logger to plot the learning curve
        logger = Logger(xlabel='timestep', ylabel='reward',
                        legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)

        for episode in range(episode_num // evaluate_every):

            # Generate data from the environment
            tasks = assign_task(evaluate_every, PROCESS_NUM)
            for task in tasks:
                INPUT_QUEUE.put((task, True, None, None))
            for _ in range(evaluate_every):
                trajectories = OUTPUT_QUEUE.get()

                # Feed transitions into agent memory, and train
                for ts in trajectories[0]:
                    agent.feed(ts)
            # Evaluate the performance
            reward = 0
Beispiel #3
0
                          min_buffer_size_to_learn=memory_init_size,
                          q_replay_memory_init_size=memory_init_size,
                          train_every=train_every,
                          q_train_every=train_every,
                          q_mlp_layers=[512, 512])
        agents.append(agent)
    random_agent = RandomAgent(action_num=eval_env.action_num)

    env.set_agents(agents)
    eval_env.set_agents([agents[0], random_agent])

    # Initialize global variables
    sess.run(tf.global_variables_initializer())

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    for episode in range(episode_num):

        # First sample a policy for the episode
        for agent in agents:
            agent.sample_episode_policy()

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)
Beispiel #4
0
set_global_seed(0)


# Initilize CFR Agent
opponent = CFRAgent(env) 
#opponent = RandomAgent(action_num=env.action_num)
#opponent.load()  # If we have saved model, we first load the model

#agent = RandomAgent(action_num=env.action_num)
agent = BRAgent(eval_env, opponent)
#agent = CFRAgent(env) 

# Evaluate CFR against pre-trained NFSP

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    opponent.train()
    #agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        exploitability(eval_env, opponent)
        #logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()
logger.plot('BR')

Beispiel #5
0
    # Initialize a global step
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Set up the agents
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_init_size=memory_init_size,
                     train_every=train_every,
                     state_shape=env.state_shape,
                     mlp_layers=[128, 128])
    # Initialize global variables
    sess.run(tf.global_variables_initializer())

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    state = env.reset()

    for timestep in range(timesteps):
        action = agent.step(state)
        next_state, reward, done = env.step(action)
        ts = (state, action, reward, next_state, done)
        agent.feed(ts)

        if timestep % evaluate_every == 0:
            rewards = []
            state = eval_env.reset()
            for _ in range(evaluate_num):
                action, _ = agent.eval_step(state)
                _, reward, done = env.step(action)
Beispiel #6
0
 def test_log(self):
     log_dir = "./newtest/test_log.txt"
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(log_dir)
     logger.log("test text")
     logger.log_performance(1, 1)
     logger.log_performance(2, 2)
     logger.log_performance(3, 3)
     logger.close_files()
     logger.plot('aaa')
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_init_size=memory_init_size,
                     train_every=train_every,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.compat.v1.global_variables_initializer())

    env.set_agents([agent, random_agent, random_agent, random_agent])
    eval_env.set_agents([agent, random_agent, random_agent, random_agent])

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    # Log Game info
    logger.log('\n########## Game information ##########')
    logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
        env.game.num_players, env.game.num_cards, episode_num))
    env.game.round.set_print_mode(print_mode=True)

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent

        for ts in trajectories[0]:
Beispiel #8
0
                  device=torch.device('cuda'))

# Other agents
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information, NFSP, RuleAgents, Pytorch ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(env.game.num_players,
                                                                 env.game.num_cards,
                                                                 conf.get_int('episode_num')))

for episode in range(conf.get_int('episode_num')):

    # First sample a policy for the episode
    agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)