def play_func(params, net, cuda, exp_queue):
    env = atari_wrappers.make_atari(params.env_name,
                                    skip_noop=True,
                                    skip_maxskip=True)
    env = atari_wrappers.wrap_deepmind(env,
                                       pytorch_img=True,
                                       frame_stack=True,
                                       frame_stack_count=2)
    env.seed(common.SEED)
    device = torch.device("cuda" if cuda else "cpu")

    selector = ptan.actions.EpsilonGreedyActionSelector(
        epsilon=params.epsilon_start)
    epsilon_tracker = common.EpsilonTracker(selector, params)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env,
                                                           agent,
                                                           gamma=params.gamma)

    for frame_idx, exp in enumerate(exp_source):
        epsilon_tracker.frame(frame_idx / BATCH_MUL)
        exp_queue.put(exp)
        for reward, steps in exp_source.pop_rewards_steps():
            exp_queue.put(EpisodeEnded(reward, steps, selector.epsilon))
예제 #2
0
        "--params",
        default="egreedy",
        choices=list(HYPERPARAMS.keys()),
        help="Parameters, default=egreedy",
    )
    args = parser.parse_args()
    params = HYPERPARAMS[args.params]
    device = torch.device("cuda" if args.cuda else "cpu")

    envs = []
    for _ in range(N_ENVS):
        env = atari_wrappers.make_atari(params.env_name,
                                        skip_noop=True,
                                        skip_maxskip=True)
        env = atari_wrappers.wrap_deepmind(env,
                                           pytorch_img=True,
                                           frame_stack=True)
        envs.append(env)

    epsilon_tracker = None
    selector = ptan.actions.ArgmaxActionSelector()
    if args.params == "egreedy":
        net = dqn_extra.BaselineDQN(env.observation_space.shape,
                                    env.action_space.n).to(device)
        selector = ptan.actions.EpsilonGreedyActionSelector(
            epsilon=params.epsilon_start)
        epsilon_tracker = common.EpsilonTracker(selector, params)
    elif args.params == "noisynet":
        net = dqn_extra.NoisyDQN(env.observation_space.shape,
                                 env.action_space.n).to(device)