def test_log(self): log_dir = "./newtest/test_log.txt" if os.path.exists(log_dir): shutil.rmtree(log_dir) logger = Logger(log_dir) logger.log("test text") logger.log_performance(1, 1) logger.log_performance(2, 2) logger.log_performance(3, 3) logger.close_files() logger.plot('aaa')
env.game.num_players, env.game.num_cards, episode_num)) # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}') # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}') for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode) # Save model save_dir = 'models/mocsar_dqn_ra_pytorch' if not os.path.exists(save_dir): os.makedirs(save_dir) state_dict = agent.get_state_dict() logger.log('\n########## Pytorch Save model ##########') logger.log('\n' + str(state_dict.keys())) torch.save(state_dict, os.path.join(save_dir, 'model.pth')) # Close files in the logger logger.close_files() # Plot the learning curve
state = env.reset() for timestep in range(timesteps): action = agent.step(state) next_state, reward, done = env.step(action) ts = (state, action, reward, next_state, done) agent.feed(ts) if timestep % evaluate_every == 0: rewards = [] state = eval_env.reset() for _ in range(evaluate_num): action, _ = agent.eval_step(state) _, reward, done = env.step(action) if done: rewards.append(reward) logger.log_performance(env.timestep, np.mean(rewards)) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('DQN') # Save model save_dir = 'models/uno_single_dqn' if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() saver.save(sess, os.path.join(save_dir, 'model'))