def test_log(self): log_dir = "./newtest/test_log.txt" if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(log_dir) logger.log("test text") logger.log_performance(1, 1) logger.log_performance(2, 2) logger.log_performance(3, 3) logger.close_files() logger.plot('aaa')
# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode) # Save model save_dir = 'models/mocsar_dqn_ra_pytorch' if not os.path.exists(save_dir): os.makedirs(save_dir) state_dict = agent.get_state_dict() logger.log('\n########## Pytorch Save model ##########') logger.log('\n' + str(state_dict.keys())) torch.save(state_dict, os.path.join(save_dir, 'model.pth')) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN RA PyTorch')
env.game.num_players, env.game.num_cards, episode_num)) # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN RA') # Save model save_dir = 'models/mocsar_dqn_ra' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.compat.v1.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))
# First sample a policy for the episode for agent in agents: agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for i in range(env.player_num): for ts in trajectories[i]: agents[i].feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('NFSP') # Save model save_dir = 'models/limit_holdem_nfsp' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))
# Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN') # Save model save_dir = 'models/gin_rummy_dqn' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))
log_dir = './experiments/leduc_holdem_br_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent opponent = CFRAgent(env) #opponent = RandomAgent(action_num=env.action_num) #opponent.load() # If we have saved model, we first load the model #agent = RandomAgent(action_num=env.action_num) agent = BRAgent(eval_env, opponent) #agent = CFRAgent(env) eval_env.set_agents([agent, opponent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): opponent.train() #agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() logger.plot('BR')
# The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('CFR')
agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for i in range(env.player_num): for ts in trajectories[i]: agents[i].feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('NFSP Torch') # Save model save_dir = 'models/mocsar_nfsp_pytorch' if not os.path.exists(save_dir): os.makedirs(save_dir) state_dict = {} for agent in agents: state_dict.update(agent.get_state_dict()) torch.save(state_dict, os.path.join(save_dir, 'model.pth'))
# First sample a policy for the episode agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode) # Save model save_dir = 'models/mocsar_nfsp_pytorch_ra' if not os.path.exists(save_dir): os.makedirs(save_dir) state_dict = agent.get_state_dict() logger.log('\n########## Pytorch Save model ##########') logger.log('\n' + str(state_dict.keys())) torch.save(state_dict, os.path.join(save_dir, 'model.pth')) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('NFSP Torch Rule Agent')