def play_func(params, net, cuda, exp_queue): env = atari_wrappers.make_atari(params.env_name, skip_noop=True, skip_maxskip=True) env = atari_wrappers.wrap_deepmind(env, pytorch_img=True, frame_stack=True, frame_stack_count=2) env.seed(common.SEED) device = torch.device("cuda" if cuda else "cpu") selector = ptan.actions.EpsilonGreedyActionSelector( epsilon=params.epsilon_start) epsilon_tracker = common.EpsilonTracker(selector, params) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=params.gamma) for frame_idx, exp in enumerate(exp_source): epsilon_tracker.frame(frame_idx / BATCH_MUL) exp_queue.put(exp) for reward, steps in exp_source.pop_rewards_steps(): exp_queue.put(EpisodeEnded(reward, steps, selector.epsilon))
"--params", default="egreedy", choices=list(HYPERPARAMS.keys()), help="Parameters, default=egreedy", ) args = parser.parse_args() params = HYPERPARAMS[args.params] device = torch.device("cuda" if args.cuda else "cpu") envs = [] for _ in range(N_ENVS): env = atari_wrappers.make_atari(params.env_name, skip_noop=True, skip_maxskip=True) env = atari_wrappers.wrap_deepmind(env, pytorch_img=True, frame_stack=True) envs.append(env) epsilon_tracker = None selector = ptan.actions.ArgmaxActionSelector() if args.params == "egreedy": net = dqn_extra.BaselineDQN(env.observation_space.shape, env.action_space.n).to(device) selector = ptan.actions.EpsilonGreedyActionSelector( epsilon=params.epsilon_start) epsilon_tracker = common.EpsilonTracker(selector, params) elif args.params == "noisynet": net = dqn_extra.NoisyDQN(env.observation_space.shape, env.action_space.n).to(device)