예제 #1
0
 def test_log(self):
     log_dir = "./newtest/test_log.txt"
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(log_dir)
     logger.log("test text")
     logger.log_performance(1, 1)
     logger.log_performance(2, 2)
     logger.log_performance(3, 3)
     logger.close_files()
     logger.plot('aaa')
예제 #2
0
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
    env.game.num_players, env.game.num_cards, episode_num))

# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
# logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)
    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
예제 #3
0
    env.model.create_agents({"mocsar_min": 4})
    env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
    env_agent_list.insert(0, agent)
    env.set_agents(env_agent_list)

    # Evaluation agent
    eval_env.model.create_agents({"mocsar_random": 4})
    eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
    eval_agent_list.insert(0, agent)
    eval_env.set_agents(eval_agent_list)

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    ## Log Game info
    logger.log('\n########## Game information ##########')
    logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
        env.game.num_players, env.game.num_cards, episode_num))

    # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
    # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
예제 #4
0
                trajectories = OUTPUT_QUEUE.get()

                # Feed transitions into agent memory, and train
                for ts in trajectories[0]:
                    agent.feed(ts)
            # Evaluate the performance
            reward = 0
            tasks = assign_task(evaluate_num, PROCESS_NUM)
            variables = tf.contrib.slim.get_variables(scope="dqn", collection=tf.GraphKeys.TRAINABLE_VARIABLES)
            variables = [var.eval() for var in variables]
            for task in tasks:
                INPUT_QUEUE.put((task, False, variables, agent.total_t))
            for _ in range(evaluate_num):
                payoffs = OUTPUT_QUEUE.get()
                reward += payoffs[0]
            logger.log('\n########## Evaluation ##########')
            logger.log('Average reward is {}'.format(float(reward)/evaluate_num))

        # Close files in the logger
        logger.close_files()

        # Plot the learning curve
        logger.plot('DQN')
        
        # Save model
        save_dir = 'models/blackjack_dqn'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(save_dir, 'model'))
예제 #5
0
env.model.create_agents({"mocsar_min": 4})
env_agent_list = [env.model.rule_agents[i] for i in range(1, 4)]
env_agent_list.insert(0, agent)
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information, NFSP, RuleAgents, Pytorch ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(env.game.num_players,
                                                                 env.game.num_cards,
                                                                 conf.get_int('episode_num')))

for episode in range(conf.get_int('episode_num')):

    # First sample a policy for the episode
    agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)