def test_log(self): logger = Logger(xlabel="x", ylabel="y", legend="test", log_path="./newtest/test_log.txt") logger.log("test text") f = open("./newtest/test_log.txt", "r") contents = f.read() self.assertEqual(contents, "test text\n") logger.close_file()
def test_log(self): log_dir = "./newtest/test_log.txt" if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(log_dir) logger.log("test text") logger.log_performance(1, 1) logger.log_performance(2, 2) logger.log_performance(3, 3) logger.close_files() logger.plot('aaa')
def test_log(self): log_path = "./newtest/test_log.txt" log_dir = os.path.dirname(log_path) if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(xlabel="x", ylabel="y", legend="test", log_path=log_path) logger.log("test text") f = open("./newtest/test_log.txt", "r") contents = f.read() self.assertEqual(contents, "test text\n") logger.close_file() shutil.rmtree(log_dir)
(env.player_num - 1)) for eval_episode in range(evaluate_num): print( '\rEPISODE {} - Eval Random {} over {} - Number of game played {} - {}' .format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format(seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_random_list.append(payoffs[0]) reward_random += payoffs[0] taking_list.append(eval_env.game.players[0].taking) logger_random.log( '\n########## Evaluation Against Random - Episode {} ##########' .format(episode)) logger_random.log( 'Timestep: {} Average reward against random is {}'.format( env.timestep, float(reward_random) / evaluate_num)) # Add point to logger logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num) # Make plot logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png') logger_random.make_plot_hist( save_path_1=figure_path_random + str(episode) + '_hist.png',
class ExperimentRunner: def __init__(self, env, eval_env, log_every, save_every, base_dir, config, training_agent, vs_agent, feed_function, save_function): self.save_dir = "{}/{}".format(base_dir, datetime.now().strftime("%Y%m%d")) self.log_dir = os.path.join(self.save_dir, "logs/") self.model_dir = os.path.join(self.save_dir, "model/") if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.log_every = log_every self.save_every = save_every self.config = config self.env = env self.eval_env = eval_env self.agent = training_agent self.training_agents = [self.agent, vs_agent] self.env.set_agents(self.training_agents) self.logger = Logger(self.log_dir) self.logger.log("CONFIG: ") self.logger.log(str(config)) self.stat_logger = YanivStatLogger(self.logger) self.feed_function = feed_function self.save_function = save_function self.action_space = utils.JOINED_ACTION_SPACE if config[ 'single_step_actions'] else utils.ACTION_SPACE def feed_game(self, agent, trajectories, player_id): self.feed_function(agent, trajectories[player_id]) if self.config.get("feed_both_games"): other_traj = trajectories[player_id + 1 % len(self.training_agents)] if self.training_agents[player_id + 1 % len(self.training_agents)].use_raw: self.feed_function( agent, list( map( lambda t: [t[0], self.action_space[t[1]], *t[2:]], other_traj, ))) else: self.feed_function(agent, other_traj) def run_training(self, episode_num, eval_every, eval_vs, eval_num): for episode in trange(episode_num, desc="Episodes", file=sys.stdout): # Generate data from the environment trajectories, _ = self.env.run(is_training=True) self.stat_logger.add_game(trajectories, self.env, 0) self.feed_game(self.agent, trajectories, 0) if self.config['feed_both_agents']: self.feed_game(self.training_agents[1], trajectories, 1) if episode != 0 and episode % self.log_every == 0: self.stat_logger.log_stats() if episode != 0 and episode % self.save_every == 0: self.save_function(self.agent, self.model_dir) if episode != 0 and episode % eval_every == 0: self.logger.log( "\n\n########## Evaluation {} ##########".format(episode)) self.evaluate_perf(eval_vs, eval_num) self.evaluate_perf(eval_vs, eval_num) self.save_function(self.agent, self.model_dir) def evaluate_perf(self, eval_vs, eval_num): if isinstance(eval_vs, list): for vs in eval_vs: self.run_evaluation(vs, eval_num) else: self.run_evaluation(eval_vs, eval_num) def run_evaluation(self, vs, num): self.eval_env.set_agents([self.agent, vs]) self.logger.log("eval vs {}".format(vs.__class__.__name__)) r = tournament(self.eval_env, num) eval_vs = "eval_{}_".format(vs.__class__.__name__) wandb.log( { eval_vs + "payoff": r["payoffs"][0], eval_vs + "draws": r["draws"], eval_vs + "roundlen": r["roundlen"], eval_vs + "assafs": r["assafs"][0], eval_vs + "win_rate": r["wins"][0] / num, }, ) self.logger.log("Timestep: {}, avg roundlen: {}".format( self.env.timestep, r["roundlen"])) for i in range(self.env.player_num): self.logger.log( "Agent {}:\nWins: {}, Draws: {}, Assafs: {}, Payoff: {}". format( i, r["wins"][i], r["draws"], r["assafs"][i], r["payoffs"][i], )) self.logger.log_performance(self.env.timestep, r["payoffs"][0])
next_state, reward, done = env.step(action) ts = (state, action, reward, next_state, done) agent.feed(ts) train_count = timestep - (memory_init_size + norm_step) if train_count > 0: loss = agent.train() print('\rINFO - Step {}, loss: {}'.format(timestep, loss), end='') if timestep % evaluate_every == 0: rewards = [] state = eval_env.reset() for _ in range(evaluate_num): action = agent.eval_step(state) _, reward, done = env.step(action) if done: rewards.append(reward) logger.log('\n########## Evaluation ##########') logger.log('Timestep: {} Average reward is {}'.format( timestep, np.mean(rewards))) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if timestep % save_plot_every == 0: logger.make_plot(save_path=figure_path + str(timestep) + '.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(timestep) + '.png')
# Init a Logger to plot the learning curve logger = Logger(root_path) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') agent.save() # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: reward = 0 for eval_episode in range(evaluate_num): _, payoffs = eval_env.run(is_training=False) reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('Iteration: {} Average reward is {}'.format( episode, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png') print('done')
agent3 = RandomAgent(action_num=env.action_num) l = [] from rlcard.utils.logger import Logger root_path = './model_result/' log_path = root_path + 'log.txt' csv_path = root_path + 'performance.csv' figure_path = root_path + 'figures/' logger = Logger(xlabel='iteration', ylabel='exploitability', legend='DeepCFR+_model', log_path=log_path, csv_path=csv_path) r = utils.reward() ''' start = time.perf_counter() e1 = np.mean(r.computer_reward(agent0, agent2, evaluate_num*20, Process_num, eval_env)) e2 = np.mean(r.computer_reward(agent1, agent2, evaluate_num*20, Process_num, eval_env)) end = time.perf_counter() logger.log('eposide {}:{:.5f},{:.5f} test time:{}'.format(0, e1, e2, end-start)) ''' for i in range(100): start = time.perf_counter() agent0.deepCFR(i, 8) #agent1.train(i,8)#20*8*1*1 #agent2.train(i,8) e1 = np.mean( r.computer_reward(agent0, agent3, evaluate_num * 50, Process_num, eval_env))
rl_loss = agents[i].train_rl() sl_loss = agents[i].train_sl() print( '\rINFO - Agent {}, step {}, rl-loss: {}, sl-loss: {}'. format(i, step_counters[i], rl_loss, sl_loss), end='') # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: reward = 0 eval_episode = 0 for eval_episode in range(evaluate_num): _, payoffs = eval_env.run(is_training=False) reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('episode: {} Average reward is {}'.format( episode / evaluate_every, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=episode / evaluate_every, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
end='') # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: print('\n\nEpisode {}'.format(episode)) bet_reward = 0 change_reward = 0 for eval_episode in range(evaluate_num): _, bet_reward_sum, change_reward_sum = eval_env.run( is_training=False) bet_reward += bet_reward_sum change_reward += change_reward_sum bet_logger.log('\n########## Evaluation ##########') bet_logger.log( 'Timestep: {} Average bet reward is {}. Average change reward is {}' .format(env.timestep, float(bet_reward) / evaluate_num, float(change_reward) / evaluate_num)) # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num)) # Add point to logger bet_logger.add_point(x=env.timestep, y=float(bet_reward) / evaluate_num) change_logger.add_point(x=env.timestep, y=float(change_reward) / evaluate_num) # Make plot
# Feed transitions into agent memory, and train the agent for i in range(env.player_num): for ts in trajectories[i]: agents[i].feed(ts) # extra logging if episode % evaluate_every == 0: reward = 0 reward2 = 0 eval_episode = 0 for eval_episode in range(evaluate_num): _, payoffs = eval_env.run(is_training=False) reward += payoffs[0] reward2 += payoffs[1] logger.log( "\n\n########## Evaluation {} ##########".format(episode)) reward_text = "{}".format(float(reward) / evaluate_num) reward2_text = "{}".format(float(reward2) / evaluate_num) info = "Timestep: {} Average reward is {}, reward2 is {}".format( env.timestep, reward_text, reward2_text) logger.log(info) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve
agent = MCCFRagent.MCCFRagent(env, isAbs=False) from rlcard.utils.logger import Logger root_path = './model_result/' log_path = root_path + 'log.txt' csv_path = root_path + 'performance.csv' figure_path = root_path + 'figures/' logger = Logger(xlabel='iteration', ylabel='exploitability', legend='DeepCFR+_model', log_path=log_path, csv_path=csv_path) l=[] r = utils.exploitability() start = time.perf_counter() e1 = np.mean(r.computer_exploitability(agent, evaluate_num*5, 8)) l.append([e1]) end = time.perf_counter() logger.log('eposide {}:{} test time:{}'.format(0, e1, end-start)) for i in range(800): agent.train() if (i+1)%32==0: start = time.perf_counter() e1 = np.mean(r.computer_exploitability(agent, evaluate_num*5, 8)) l.append([e1]) end = time.perf_counter() logger.log('eposide {}:{} test time:{}'.format((i+1)/32, e1, end-start)) for item in l: print(item) ''' agent1 = agent = cfr_agent.CFRAgent(env, isAbs=False)
legend='CFR on nolimit Holdem', log_path=log_reward_path, csv_path=csv_reward_path) for episode in range(episode_num): agent.train() if episode % 1000 == 0: print('\rIteration {}'.format(episode), end='\n') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: #agent.save() # Save model reward = 0 for eval_episode in range(evaluate_num): his, payoffs = eval_env.run(is_training=False) reward += payoffs[0] logger_reward.log('\n########## Evaluation ##########') logger_reward.log('Iteration: {} Average reward is {}'.format( episode, float(reward) / evaluate_num)) # Add point to logger logger_reward.add_point(x=episode, y=float(reward) / evaluate_num) import time start = time.perf_counter() exploitability = agent.compute_exploitability(evaluate_num) end = time.perf_counter() logger.log('episode: {} cost {:10}s ,exploitability is {}'.format( episode, end - start, exploitability)) logger.add_point(x=episode, y=exploitability) print("\n") # Make plot
env.timestep += timestep # Feed transitions into agent memory, and train for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance reward = 0 tasks = assign_task(evaluate_num, PROCESS_NUM) variables = tf.contrib.slim.get_variables( scope="dqn", collection=tf.GraphKeys.TRAINABLE_VARIABLES) variables = [var.eval() for var in variables] for task in tasks: INPUT_QUEUE.put((task, False, variables, agent.total_t)) for _ in range(evaluate_num): payoffs = OUTPUT_QUEUE.get() reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('Average reward is {}'.format( float(reward) / evaluate_num)) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN_multi_process') # Save model save_dir = 'models/leduc_dqn_multi' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))
# Train the agent train_count = step_counter - (memory_init_size + norm_step) if train_count > 0: loss = agent.train() print('\rINFO - Step {}, hand loss : {}'.format(step_counter, loss), end='') # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: print('Episode {}'.format(episode)) reward = 0 for eval_episode in range(evaluate_num): _, _, reward_sum = eval_env.run(is_training=False) reward += reward_sum logger.log('\n########## Evaluation ##########') logger.log('Timestep: {} Average hand reward is {}'.format(env.timestep, float(reward)/evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward)/evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path+str(episode)+'.png') if episode % checkpoint_every == 0 and episode > 0: ckpt = agent.save(checkpoint_path, episode) print('Saved to {}'.format(ckpt)) # Make the final plot logger.make_plot(save_path=figure_path+str(episode)+'.png')
reward = 0 reward_list = [] for eval_episode in range(evaluate_num): print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format( seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_list.append(payoffs[0]) reward += payoffs[0] logger.log('\n########## Evaluation - Episode {} ##########'.format(episode)) logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list) # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
figure_path = root_path + 'figures/' logger = Logger(xlabel='iteration', ylabel='exploitability', legend='DeepCFR+_model', log_path=log_path, csv_path=csv_path) #r = utils.reward() r = utils.exploitability() l = [] start = time.perf_counter() e1 = np.mean(r.computer_exploitability(agent2, evaluate_num, 8)) e2 = np.mean(r.computer_exploitability(agent3, evaluate_num, 8)) l.append([e1, e2]) end = time.perf_counter() logger.log('eposide {}:{},{} test time:{}'.format(0, e1, e2, end - start)) for i in range(10): agent2.train(i, 8) agent3.train(i, 8) start = time.perf_counter() e1 = np.mean(r.computer_exploitability(agent2, evaluate_num, 8)) e2 = np.mean(r.computer_exploitability(agent3, evaluate_num, 8)) l.append([e1, e2]) end = time.perf_counter() logger.log('eposide {}:{},{} test time:{}'.format(i, e1, e2, end - start)) agent2.save() agent3.save() for item in l: print(item)