def make_env(params): env = atari_wrappers.make_atari(params['env_name'], fsa=params['fsa']) env = atari_wrappers.wrap_deepmind(env, frame_stack=True, pytorch_img=True, fsa=params['fsa']) return env
def play_func(params, net, cuda, exp_queue): env = atari_wrappers.make_atari(params.env_name, skip_noop=True, skip_maxskip=True) env = atari_wrappers.wrap_deepmind(env, pytorch_img=True, frame_stack=True, frame_stack_count=2) env.seed(common.SEED) device = torch.device("cuda" if cuda else "cpu") selector = ptan.actions.EpsilonGreedyActionSelector( epsilon=params.epsilon_start) epsilon_tracker = common.EpsilonTracker(selector, params) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast( env, agent, gamma=params.gamma) for frame_idx, exp in enumerate(exp_source): epsilon_tracker.frame(frame_idx/BATCH_MUL) exp_queue.put(exp) for reward, steps in exp_source.pop_rewards_steps(): exp_queue.put(EpisodeEnded(reward, steps, selector.epsilon))
mp.set_start_method('spawn') params = common.HYPERPARAMS['pong'] parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") env = atari_wrappers.make_atari(params.env_name, skip_noop=True, skip_maxskip=True) env = atari_wrappers.wrap_deepmind(env, pytorch_img=True, frame_stack=True, frame_stack_count=2) net = dqn_model.DQN(env.observation_space.shape, env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) buffer = ptan.experience.ExperienceReplayBuffer( experience_source=None, buffer_size=params.replay_size) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) # start subprocess and experience queue exp_queue = mp.Queue(maxsize=BATCH_MUL * 2) play_proc = mp.Process(target=play_func, args=(params, net, args.cuda, exp_queue)) play_proc.start()
help="Enable cuda") parser.add_argument("-n", "--name", required=True, help="Run name") parser.add_argument("-p", "--params", default='ppo', choices=list(HYPERPARAMS.keys()), help="Parameters, default=ppo") args = parser.parse_args() params = HYPERPARAMS[args.params] device = torch.device("cuda" if args.cuda else "cpu") test_env = atari_wrappers.make_atari(params.env_name, skip_noop=True, skip_maxskip=True) test_env = atari_wrappers.wrap_deepmind(test_env, pytorch_img=True, frame_stack=True) do_distill = False dist_ref = dist_trn = None if args.params == 'noisynet': net = ppo.AtariNoisyNetsPPO(test_env.observation_space.shape, test_env.action_space.n).to(device) elif args.params == 'distill': net = ppo.AtariDistillPPO(test_env.observation_space.shape, test_env.action_space.n).to(device) do_distill = True dist_ref = ppo.AtariDistill( test_env.observation_space.shape).to(device) dist_ref.train(False)