def main(cfg: omegaconf.DictConfig):

	# create the environment
	env = atari_wrappers.make_env(cfg.exp.env)
	env = gym.wrappers.Monitor(env, "recording/", force=True)
	obs = env.reset()

	# TensorBoard
	writer = SummaryWriter()
	writer.add_hparams(flatten_dict(cfg), {})
	logger.info('Hyperparams:', cfg)

	# create the agent
	agent = DQNAgent(env, device=cfg.train.device, summary_writer=writer, cfg=cfg)

	n_games = 0
	max_mean_40_reward = -sys.maxsize

	# Play MAX_N_GAMES games
	while n_games < cfg.train.max_episodes:
		# act greedly
		action = agent.act_eps_greedy(obs)

		# one step on the environment
		new_obs, reward, done, _ = env.step(action)

		# add the environment feedback to the agent
		agent.add_env_feedback(obs, action, new_obs, reward, done)

		# sample and optimize NB: the agent could wait to have enough memories
		agent.sample_and_optimize(cfg.train.batch_size)

		obs = new_obs
		if done:
			n_games += 1
			agent.print_info()
			agent.reset_stats()
			obs = env.reset()
			if agent.rewards:
				current_mean_40_reward = np.mean(agent.rewards[-40:])
				if current_mean_40_reward > max_mean_40_reward:
					agent.save_model(cfg.train.best_checkpoint)
	writer.close()
Beispiel #2
0
                     summary_writer=writer,
                     hyperparameters=DQN_HYPERPARAMS)

    n_games = 0
    n_iter = 0

    # Play MAX_N_GAMES games
    while n_games < MAX_N_GAMES:
        # act greedly
        action = agent.act_eps_greedy(obs)

        # one step on the environment
        new_obs, reward, done, _ = env.step(action)

        # add the environment feedback to the agent
        agent.add_env_feedback(obs, action, new_obs, reward, done)

        # sample and optimize NB: the agent could wait to have enough memories
        agent.sample_and_optimize(BATCH_SIZE)

        obs = new_obs
        if done:
            n_games += 1

            # print info about the agent and reset the stats
            agent.print_info()
            agent.reset_stats()

            if n_games % TEST_FREQUENCY == 0:
                print('Test mean:', utils.test_game(env, agent, 1))
Beispiel #3
0
def main():

    args = parse_args()

    # Overwrite default values
    DQN_HYPERPARAMS['epsilon_final'] = args.eps
    DQN_HYPERPARAMS['double_DQN'] = args.ddqn

    # create the environment
    # env = atari_wrappers.make_env(ENV_NAME)
    env = atari_wrappers.make_env(args.env_name)

    # Create run name with environment name and timestamp of launch
    # (and optional tag)
    run_name = args.env_name
    if args.tag != "":
        run_name += f"_{args.tag}"
    run_name += "_run_" + datetime.now().strftime("%Y%m%d_%H%M")

    if SAVE_VIDEO:
        # save the video of the games
        # env = gym.wrappers.Monitor(env, "main-"+args.env_name, force=True)
        # Save every 50th episode
        env = gym.wrappers.Monitor(
            env,
            "videos/" + args.env_name + "/run_" +
            datetime.now().strftime("%Y%m%d_%H%M"),  # noqa
            video_callable=lambda episode_id: episode_id % 50 == 0)

    # TensorBoard
    writer = SummaryWriter(log_dir=LOG_DIR+'/'+run_name) \
        if SUMMARY_WRITER else None

    print('Hyperparams:', DQN_HYPERPARAMS)

    # create the agent
    agent = DQNAgent(env, DQN_HYPERPARAMS, DEVICE, summary_writer=writer)

    n_games = 0
    # n_iter = 0

    # Play MAX_N_GAMES games
    while n_games < MAX_N_GAMES:

        obs = env.reset()
        done = False

        while not done:

            # act greedly
            action = agent.act_eps_greedy(obs)

            # one step on the environment
            new_obs, reward, done, _ = env.step(action)

            # add the environment feedback to the agent
            agent.add_env_feedback(obs, action, new_obs, reward, done)

            # sample and optimize NB: the agent could wait to have enough
            # memories
            agent.sample_and_optimize(BATCH_SIZE)

            obs = new_obs

        n_games += 1

        # print info about the agent and reset the stats
        agent.print_info()
        agent.reset_stats()

        # if n_games % TEST_FREQUENCY == 0:
        # print('Test mean:', utils.test_game(env, agent, 1))

    writer.close()