def main(cfg: omegaconf.DictConfig): # create the environment env = atari_wrappers.make_env(cfg.exp.env) env = gym.wrappers.Monitor(env, "recording/", force=True) obs = env.reset() # TensorBoard writer = SummaryWriter() writer.add_hparams(flatten_dict(cfg), {}) logger.info('Hyperparams:', cfg) # create the agent agent = DQNAgent(env, device=cfg.train.device, summary_writer=writer, cfg=cfg) n_games = 0 max_mean_40_reward = -sys.maxsize # Play MAX_N_GAMES games while n_games < cfg.train.max_episodes: # act greedly action = agent.act_eps_greedy(obs) # one step on the environment new_obs, reward, done, _ = env.step(action) # add the environment feedback to the agent agent.add_env_feedback(obs, action, new_obs, reward, done) # sample and optimize NB: the agent could wait to have enough memories agent.sample_and_optimize(cfg.train.batch_size) obs = new_obs if done: n_games += 1 agent.print_info() agent.reset_stats() obs = env.reset() if agent.rewards: current_mean_40_reward = np.mean(agent.rewards[-40:]) if current_mean_40_reward > max_mean_40_reward: agent.save_model(cfg.train.best_checkpoint) writer.close()
summary_writer=writer, hyperparameters=DQN_HYPERPARAMS) n_games = 0 n_iter = 0 # Play MAX_N_GAMES games while n_games < MAX_N_GAMES: # act greedly action = agent.act_eps_greedy(obs) # one step on the environment new_obs, reward, done, _ = env.step(action) # add the environment feedback to the agent agent.add_env_feedback(obs, action, new_obs, reward, done) # sample and optimize NB: the agent could wait to have enough memories agent.sample_and_optimize(BATCH_SIZE) obs = new_obs if done: n_games += 1 # print info about the agent and reset the stats agent.print_info() agent.reset_stats() if n_games % TEST_FREQUENCY == 0: print('Test mean:', utils.test_game(env, agent, 1))
def main(): args = parse_args() # Overwrite default values DQN_HYPERPARAMS['epsilon_final'] = args.eps DQN_HYPERPARAMS['double_DQN'] = args.ddqn # create the environment # env = atari_wrappers.make_env(ENV_NAME) env = atari_wrappers.make_env(args.env_name) # Create run name with environment name and timestamp of launch # (and optional tag) run_name = args.env_name if args.tag != "": run_name += f"_{args.tag}" run_name += "_run_" + datetime.now().strftime("%Y%m%d_%H%M") if SAVE_VIDEO: # save the video of the games # env = gym.wrappers.Monitor(env, "main-"+args.env_name, force=True) # Save every 50th episode env = gym.wrappers.Monitor( env, "videos/" + args.env_name + "/run_" + datetime.now().strftime("%Y%m%d_%H%M"), # noqa video_callable=lambda episode_id: episode_id % 50 == 0) # TensorBoard writer = SummaryWriter(log_dir=LOG_DIR+'/'+run_name) \ if SUMMARY_WRITER else None print('Hyperparams:', DQN_HYPERPARAMS) # create the agent agent = DQNAgent(env, DQN_HYPERPARAMS, DEVICE, summary_writer=writer) n_games = 0 # n_iter = 0 # Play MAX_N_GAMES games while n_games < MAX_N_GAMES: obs = env.reset() done = False while not done: # act greedly action = agent.act_eps_greedy(obs) # one step on the environment new_obs, reward, done, _ = env.step(action) # add the environment feedback to the agent agent.add_env_feedback(obs, action, new_obs, reward, done) # sample and optimize NB: the agent could wait to have enough # memories agent.sample_and_optimize(BATCH_SIZE) obs = new_obs n_games += 1 # print info about the agent and reset the stats agent.print_info() agent.reset_stats() # if n_games % TEST_FREQUENCY == 0: # print('Test mean:', utils.test_game(env, agent, 1)) writer.close()