def test_make_plot(self): logger = Logger(xlabel="x", ylabel="y", legend="test") for x in range(10): logger.add_point(x=x, y=x * x) self.assertEqual(9 * 9, logger.ys[9]) save_path = './newtest/test.png' save_dir = os.path.dirname(save_path) if os.path.exists(save_dir): shutil.rmtree(save_dir) logger.make_plot(save_path=save_path) shutil.rmtree(save_dir)
def train(): env = rlcard.make('mahjong', {'allow_step_back': True}) # env = rlcard.make('mahjong') # Set the iterations numbers and how frequently we evaluate/save plot evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 100000 # The paths for saving the logs and learning curves root_path = './experiments/mahjong_cfr_result/' log_path = root_path + 'log.txt' csv_path = root_path + 'performance.csv' figure_path = root_path + 'figures/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = MCCFRAgent(env) # Init a Logger to plot the learning curve logger = Logger(root_path) for episode in range(episode_num + 1): agent.train() print('\rIteration {}'.format(episode), end='') if episode % 5000 == 0: agent.save(episode) # # Evaluate the performance. Play with NFSP agents. # if episode % evaluate_every == 0: # reward = 0 # for eval_episode in range(evaluate_num): # _, payoffs = eval_env.run(is_training=False) # # reward += payoffs[0] # # logger.log('\n########## Evaluation ##########') # logger.log('Iteration: {} Average reward is {}'.format(episode, float(reward)/evaluate_num)) # # # Add point to logger # logger.add_point(x=env.timestep, y=float(reward)/evaluate_num) # # # Make plot # if episode % save_plot_every == 0 and episode > 0: # logger.make_plot(save_path=figure_path+str(episode)+'.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
reward = 0 reward_list = [] for eval_episode in range(evaluate_num): print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format( seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_list.append(payoffs[0]) reward += payoffs[0] logger.log('\n########## Evaluation - Episode {} ##########'.format(episode)) logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list) # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
taking_list.append(eval_env.game.players[0].taking) logger_random.log( '\n########## Evaluation Against Random - Episode {} ##########' .format(episode)) logger_random.log( 'Timestep: {} Average reward against random is {}'.format( env.timestep, float(reward_random) / evaluate_num)) # Add point to logger logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num) # Make plot logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png') logger_random.make_plot_hist( save_path_1=figure_path_random + str(episode) + '_hist.png', save_path_2=figure_path_random + str(episode) + '_freq.png', reward_list=reward_random_list, taking_list=taking_list) # Eval against last agent reward_opponent = 0 reward_opponent_list = [] taking_list = [] eval_env.set_agents([agent] + [opponent_agent] * (env.player_num - 1)) for eval_episode in range(evaluate_num): print( '\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}'
next_state, reward, done = env.step(action) ts = (state, action, reward, next_state, done) agent.feed(ts) train_count = timestep - (memory_init_size + norm_step) if train_count > 0: loss = agent.train() print('\rINFO - Step {}, loss: {}'.format(timestep, loss), end='') if timestep % evaluate_every == 0: rewards = [] state = eval_env.reset() for _ in range(evaluate_num): action = agent.eval_step(state) _, reward, done = env.step(action) if done: rewards.append(reward) logger.log('\n########## Evaluation ##########') logger.log('Timestep: {} Average reward is {}'.format( timestep, np.mean(rewards))) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if timestep % save_plot_every == 0: logger.make_plot(save_path=figure_path + str(timestep) + '.png') # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(timestep) + '.png')
def test_make_plot(self): logger = Logger(xlabel="x", ylabel="y", legend="test") for x in range(10): logger.add_point(x=x, y=x * x) self.assertEqual(9 * 9, logger.ys[9]) logger.make_plot(save_path='./newtest/test.png')
bet_logger.log( 'Timestep: {} Average bet reward is {}. Average change reward is {}' .format(env.timestep, float(bet_reward) / evaluate_num, float(change_reward) / evaluate_num)) # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num)) # Add point to logger bet_logger.add_point(x=env.timestep, y=float(bet_reward) / evaluate_num) change_logger.add_point(x=env.timestep, y=float(change_reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: bet_logger.make_plot(save_path=figure_path + 'bet/' + str(episode) + '.png') change_logger.make_plot(save_path=figure_path + 'change/' + str(episode) + '.png') if episode % checkpoint_every == 0 and episode > 0: bet_path, change_path = agent.save(checkpoint_path, episode) print('Saved to {}, {}'.format(bet_path, change_path)) # Make the final plot bet_logger.make_plot(save_path=figure_path + 'bet/' + str(episode) + '.png') change_logger.make_plot(save_path=figure_path + 'change/' + str(episode) + '.png')
trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train for ts in trajectories[0]: agent.feed(ts) step_counter += 1 # Train the agent if step_counter > memory_init_size + norm_step: agent.train() # Evaluate the performance if episode % evaluate_every == 0: reward = 0 for eval_episode in range(evaluate_num): _, payoffs = env.run(is_training=False) reward += payoffs[0] logger.log('\n########## Evaluation ##########') logger.log('Episode: {} Average reward is {}'.format(episode, float(reward)/evaluate_num)) # Add point to logger logger.add_point(x=episode, y=float(reward)/evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path='./experiments/blackjack_dqn_result/'+str(episode)+'.png') # Make the final plot logger.make_plot(save_path='./experiments/blackjack_dqn_result/'+'final_'+str(episode)+'.png')