Esempio n. 1
0
 def test_log(self):
     logger = Logger(xlabel="x",
                     ylabel="y",
                     legend="test",
                     log_path="./newtest/test_log.txt")
     logger.log("test text")
     f = open("./newtest/test_log.txt", "r")
     contents = f.read()
     self.assertEqual(contents, "test text\n")
     logger.close_file()
Esempio n. 2
0
 def test_log(self):
     log_dir = "./newtest/test_log.txt"
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(log_dir)
     logger.log("test text")
     logger.log_performance(1, 1)
     logger.log_performance(2, 2)
     logger.log_performance(3, 3)
     logger.close_files()
     logger.plot('aaa')
Esempio n. 3
0
 def test_log(self):
     log_path = "./newtest/test_log.txt"
     log_dir = os.path.dirname(log_path)
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(xlabel="x", ylabel="y", legend="test", log_path=log_path)
     logger.log("test text")
     f = open("./newtest/test_log.txt", "r")
     contents = f.read()
     self.assertEqual(contents, "test text\n")
     logger.close_file()
     shutil.rmtree(log_dir)
Esempio n. 4
0
                                (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print(
                    '\rEPISODE {} - Eval Random {} over {} - Number of game played {} - {}'
                    .format(episode, eval_episode, evaluate_num,
                            total_game_played,
                            time_difference_good_format(seconds, time.time())),
                    end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_random_list.append(payoffs[0])
                reward_random += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log(
                '\n########## Evaluation Against Random - Episode {} ##########'
                .format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(
                    env.timestep,
                    float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep,
                                    y=float(reward_random) / evaluate_num)

            # Make plot
            logger_random.make_plot(save_path=figure_path_random +
                                    str(episode) + '.png')
            logger_random.make_plot_hist(
                save_path_1=figure_path_random + str(episode) + '_hist.png',
Esempio n. 5
0
class ExperimentRunner:
    def __init__(self, env, eval_env, log_every, save_every, base_dir, config,
                 training_agent, vs_agent, feed_function, save_function):
        self.save_dir = "{}/{}".format(base_dir,
                                       datetime.now().strftime("%Y%m%d"))
        self.log_dir = os.path.join(self.save_dir, "logs/")
        self.model_dir = os.path.join(self.save_dir, "model/")
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        self.log_every = log_every
        self.save_every = save_every

        self.config = config
        self.env = env
        self.eval_env = eval_env
        self.agent = training_agent
        self.training_agents = [self.agent, vs_agent]
        self.env.set_agents(self.training_agents)

        self.logger = Logger(self.log_dir)
        self.logger.log("CONFIG: ")
        self.logger.log(str(config))
        self.stat_logger = YanivStatLogger(self.logger)

        self.feed_function = feed_function
        self.save_function = save_function

        self.action_space = utils.JOINED_ACTION_SPACE if config[
            'single_step_actions'] else utils.ACTION_SPACE

    def feed_game(self, agent, trajectories, player_id):
        self.feed_function(agent, trajectories[player_id])

        if self.config.get("feed_both_games"):
            other_traj = trajectories[player_id +
                                      1 % len(self.training_agents)]
            if self.training_agents[player_id +
                                    1 % len(self.training_agents)].use_raw:
                self.feed_function(
                    agent,
                    list(
                        map(
                            lambda t: [t[0], self.action_space[t[1]], *t[2:]],
                            other_traj,
                        )))
            else:
                self.feed_function(agent, other_traj)

    def run_training(self, episode_num, eval_every, eval_vs, eval_num):
        for episode in trange(episode_num, desc="Episodes", file=sys.stdout):
            # Generate data from the environment
            trajectories, _ = self.env.run(is_training=True)
            self.stat_logger.add_game(trajectories, self.env, 0)

            self.feed_game(self.agent, trajectories, 0)
            if self.config['feed_both_agents']:
                self.feed_game(self.training_agents[1], trajectories, 1)

            if episode != 0 and episode % self.log_every == 0:
                self.stat_logger.log_stats()

            if episode != 0 and episode % self.save_every == 0:
                self.save_function(self.agent, self.model_dir)

            if episode != 0 and episode % eval_every == 0:
                self.logger.log(
                    "\n\n########## Evaluation {} ##########".format(episode))
                self.evaluate_perf(eval_vs, eval_num)

        self.evaluate_perf(eval_vs, eval_num)
        self.save_function(self.agent, self.model_dir)

    def evaluate_perf(self, eval_vs, eval_num):
        if isinstance(eval_vs, list):
            for vs in eval_vs:
                self.run_evaluation(vs, eval_num)
        else:
            self.run_evaluation(eval_vs, eval_num)

    def run_evaluation(self, vs, num):
        self.eval_env.set_agents([self.agent, vs])
        self.logger.log("eval vs {}".format(vs.__class__.__name__))
        r = tournament(self.eval_env, num)

        eval_vs = "eval_{}_".format(vs.__class__.__name__)
        wandb.log(
            {
                eval_vs + "payoff": r["payoffs"][0],
                eval_vs + "draws": r["draws"],
                eval_vs + "roundlen": r["roundlen"],
                eval_vs + "assafs": r["assafs"][0],
                eval_vs + "win_rate": r["wins"][0] / num,
            }, )

        self.logger.log("Timestep: {}, avg roundlen: {}".format(
            self.env.timestep, r["roundlen"]))
        for i in range(self.env.player_num):
            self.logger.log(
                "Agent {}:\nWins: {}, Draws: {}, Assafs: {}, Payoff: {}".
                format(
                    i,
                    r["wins"][i],
                    r["draws"],
                    r["assafs"][i],
                    r["payoffs"][i],
                ))

        self.logger.log_performance(self.env.timestep, r["payoffs"][0])
Esempio n. 6
0
        next_state, reward, done = env.step(action)
        ts = (state, action, reward, next_state, done)
        agent.feed(ts)

        train_count = timestep - (memory_init_size + norm_step)
        if train_count > 0:
            loss = agent.train()
            print('\rINFO - Step {}, loss: {}'.format(timestep, loss), end='')

        if timestep % evaluate_every == 0:
            rewards = []
            state = eval_env.reset()
            for _ in range(evaluate_num):
                action = agent.eval_step(state)
                _, reward, done = env.step(action)
                if done:
                    rewards.append(reward)
            logger.log('\n########## Evaluation ##########')
            logger.log('Timestep: {} Average reward is {}'.format(
                timestep, np.mean(rewards)))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if timestep % save_plot_every == 0:
            logger.make_plot(save_path=figure_path + str(timestep) + '.png')

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(timestep) + '.png')
Esempio n. 7
0
# Init a Logger to plot the learning curve
logger = Logger(root_path)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    agent.save()
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        reward = 0
        for eval_episode in range(evaluate_num):
            _, payoffs = eval_env.run(is_training=False)

            reward += payoffs[0]

        logger.log('\n########## Evaluation ##########')
        logger.log('Iteration: {} Average reward is {}'.format(
            episode,
            float(reward) / evaluate_num))

        # Add point to logger
        logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

    # Make plot
    if episode % save_plot_every == 0 and episode > 0:
        logger.make_plot(save_path=figure_path + str(episode) + '.png')

# Make the final plot
logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')

print('done')
Esempio n. 8
0
agent3 = RandomAgent(action_num=env.action_num)
l = []

from rlcard.utils.logger import Logger
root_path = './model_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'
logger = Logger(xlabel='iteration',
                ylabel='exploitability',
                legend='DeepCFR+_model',
                log_path=log_path,
                csv_path=csv_path)

r = utils.reward()
'''
start = time.perf_counter()
e1 = np.mean(r.computer_reward(agent0, agent2, evaluate_num*20, Process_num, eval_env))
e2 = np.mean(r.computer_reward(agent1, agent2, evaluate_num*20, Process_num, eval_env))
end = time.perf_counter()
logger.log('eposide {}:{:.5f},{:.5f} test time:{}'.format(0, e1, e2, end-start))
'''

for i in range(100):
    start = time.perf_counter()
    agent0.deepCFR(i, 8)
    #agent1.train(i,8)#20*8*1*1
    #agent2.train(i,8)
    e1 = np.mean(
        r.computer_reward(agent0, agent3, evaluate_num * 50, Process_num,
                          eval_env))
Esempio n. 9
0
                    rl_loss = agents[i].train_rl()
                    sl_loss = agents[i].train_sl()
                    print(
                        '\rINFO - Agent {}, step {}, rl-loss: {}, sl-loss: {}'.
                        format(i, step_counters[i], rl_loss, sl_loss),
                        end='')

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            reward = 0
            eval_episode = 0
            for eval_episode in range(evaluate_num):
                _, payoffs = eval_env.run(is_training=False)
                reward += payoffs[0]

            logger.log('\n########## Evaluation ##########')
            logger.log('episode: {} Average reward is {}'.format(
                episode / evaluate_every,
                float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=episode / evaluate_every,
                             y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
Esempio n. 10
0
                    end='')

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            print('\n\nEpisode {}'.format(episode))

            bet_reward = 0
            change_reward = 0
            for eval_episode in range(evaluate_num):
                _, bet_reward_sum, change_reward_sum = eval_env.run(
                    is_training=False)

                bet_reward += bet_reward_sum
                change_reward += change_reward_sum

            bet_logger.log('\n########## Evaluation ##########')
            bet_logger.log(
                'Timestep: {} Average bet reward is {}. Average change reward is {}'
                .format(env.timestep,
                        float(bet_reward) / evaluate_num,
                        float(change_reward) / evaluate_num))

            # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num))

            # Add point to logger
            bet_logger.add_point(x=env.timestep,
                                 y=float(bet_reward) / evaluate_num)
            change_logger.add_point(x=env.timestep,
                                    y=float(change_reward) / evaluate_num)

        # Make plot
Esempio n. 11
0
        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)

        # extra logging
        if episode % evaluate_every == 0:
            reward = 0
            reward2 = 0
            eval_episode = 0
            for eval_episode in range(evaluate_num):
                _, payoffs = eval_env.run(is_training=False)
                reward += payoffs[0]
                reward2 += payoffs[1]

            logger.log(
                "\n\n########## Evaluation {} ##########".format(episode))
            reward_text = "{}".format(float(reward) / evaluate_num)
            reward2_text = "{}".format(float(reward2) / evaluate_num)
            info = "Timestep: {} Average reward is {}, reward2 is {}".format(
                env.timestep, reward_text, reward2_text)
            logger.log(info)

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            logger.log_performance(env.timestep,
                                   tournament(eval_env, evaluate_num)[0])

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
Esempio n. 12
0
agent = MCCFRagent.MCCFRagent(env, isAbs=False)
from rlcard.utils.logger import Logger
root_path = './model_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'
logger = Logger(xlabel='iteration', ylabel='exploitability', legend='DeepCFR+_model', log_path=log_path, csv_path=csv_path)

l=[]
r = utils.exploitability()
start = time.perf_counter()
e1 = np.mean(r.computer_exploitability(agent, evaluate_num*5, 8))
l.append([e1])
end = time.perf_counter()
logger.log('eposide {}:{} test time:{}'.format(0, e1, end-start))

for i in range(800):
    agent.train()
    if (i+1)%32==0:
        start = time.perf_counter()
        e1 = np.mean(r.computer_exploitability(agent, evaluate_num*5, 8))
        l.append([e1])
        end = time.perf_counter()
        logger.log('eposide {}:{} test time:{}'.format((i+1)/32, e1, end-start))
for item in l:
    print(item)

    
'''
agent1 = agent = cfr_agent.CFRAgent(env, isAbs=False)
Esempio n. 13
0
                       legend='CFR on nolimit Holdem',
                       log_path=log_reward_path,
                       csv_path=csv_reward_path)
for episode in range(episode_num):
    agent.train()
    if episode % 1000 == 0:
        print('\rIteration {}'.format(episode), end='\n')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        #agent.save() # Save model
        reward = 0
        for eval_episode in range(evaluate_num):
            his, payoffs = eval_env.run(is_training=False)
            reward += payoffs[0]

        logger_reward.log('\n########## Evaluation ##########')
        logger_reward.log('Iteration: {} Average reward is {}'.format(
            episode,
            float(reward) / evaluate_num))

        # Add point to logger
        logger_reward.add_point(x=episode, y=float(reward) / evaluate_num)
        import time
        start = time.perf_counter()
        exploitability = agent.compute_exploitability(evaluate_num)
        end = time.perf_counter()
        logger.log('episode: {} cost {:10}s ,exploitability is {}'.format(
            episode, end - start, exploitability))
        logger.add_point(x=episode, y=exploitability)
        print("\n")
    # Make plot
Esempio n. 14
0
                env.timestep += timestep
                # Feed transitions into agent memory, and train
                for ts in trajectories[0]:
                    agent.feed(ts)
            # Evaluate the performance
            reward = 0
            tasks = assign_task(evaluate_num, PROCESS_NUM)
            variables = tf.contrib.slim.get_variables(
                scope="dqn", collection=tf.GraphKeys.TRAINABLE_VARIABLES)
            variables = [var.eval() for var in variables]
            for task in tasks:
                INPUT_QUEUE.put((task, False, variables, agent.total_t))
            for _ in range(evaluate_num):
                payoffs = OUTPUT_QUEUE.get()
                reward += payoffs[0]
            logger.log('\n########## Evaluation ##########')
            logger.log('Average reward is {}'.format(
                float(reward) / evaluate_num))

        # Close files in the logger
        logger.close_files()

        # Plot the learning curve
        logger.plot('DQN_multi_process')

        # Save model
        save_dir = 'models/leduc_dqn_multi'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(save_dir, 'model'))
Esempio n. 15
0
            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                print('\rINFO - Step {}, hand loss : {}'.format(step_counter, loss), end='')

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            print('Episode {}'.format(episode))
            reward = 0
            for eval_episode in range(evaluate_num):
                _, _, reward_sum = eval_env.run(is_training=False)

                reward += reward_sum

            logger.log('\n########## Evaluation ##########')
            logger.log('Timestep: {} Average hand reward is {}'.format(env.timestep, float(reward)/evaluate_num))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward)/evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path+str(episode)+'.png')
        
        if episode % checkpoint_every == 0 and episode > 0:
            ckpt = agent.save(checkpoint_path, episode)
            print('Saved to {}'.format(ckpt))

    # Make the final plot
    logger.make_plot(save_path=figure_path+str(episode)+'.png')
Esempio n. 16
0
            reward = 0
            reward_list = []
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode,
                                                                                              evaluate_num,
                                                                                              total_game_played,
                                                                                              time_difference_good_format(
                                                                                                  seconds,
                                                                                                  time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_list.append(payoffs[0])
                reward += payoffs[0]

            logger.log('\n########## Evaluation - Episode {} ##########'.format(episode))
            logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')
            logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                                  save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
    logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                          save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
Esempio n. 17
0
figure_path = root_path + 'figures/'
logger = Logger(xlabel='iteration',
                ylabel='exploitability',
                legend='DeepCFR+_model',
                log_path=log_path,
                csv_path=csv_path)

#r = utils.reward()
r = utils.exploitability()
l = []
start = time.perf_counter()
e1 = np.mean(r.computer_exploitability(agent2, evaluate_num, 8))
e2 = np.mean(r.computer_exploitability(agent3, evaluate_num, 8))
l.append([e1, e2])
end = time.perf_counter()
logger.log('eposide {}:{},{} test time:{}'.format(0, e1, e2, end - start))

for i in range(10):
    agent2.train(i, 8)
    agent3.train(i, 8)
    start = time.perf_counter()
    e1 = np.mean(r.computer_exploitability(agent2, evaluate_num, 8))
    e2 = np.mean(r.computer_exploitability(agent3, evaluate_num, 8))
    l.append([e1, e2])
    end = time.perf_counter()
    logger.log('eposide {}:{},{} test time:{}'.format(i, e1, e2, end - start))

agent2.save()
agent3.save()
for item in l:
    print(item)