parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") env = gym.make(params.env_name) env = drl.common.wrappers.wrap_dqn(env) env.seed(common.SEED) input_shape = env.observation_space.shape n_actions = env.action_space.n selector = dac.GreedySelector() net = dqn_extra.RainbowDQN(input_shape, n_actions).to(device) agent = dag.DQNAgent(net, selector, device) tgt_net = dag.TargetNet(net) buffer = dexp.PrioReplayBuffer(params.replay_size, PRIO_REPLAY_ALPHA, BETA_START) exp_source = dexp.ExperienceSource(env, agent, buffer, 1, params.gamma) beta_tracker = dexp.BetaTracker(buffer, BETA_START, BETA_FRAMES) writer = SummaryWriter(comment="-" + params.env_name) print(net) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) total_reward = [] frame_idx = 0 ts_frame = 0
random.seed(common.SEED) torch.manual_seed(common.SEED) params = common.HYPERPARAMS['pong'] parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") env = gym.make(params.env_name) env = ptan.common.wrappers.wrap_dqn(env) env.seed(common.SEED) net = dqn_extra.RainbowDQN(env.observation_space.shape, env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.ArgmaxActionSelector() agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=params.gamma, steps_count=N_STEPS) buffer = dqn_extra.PrioReplayBuffer(exp_source, params.replay_size, PRIO_REPLAY_ALPHA) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) def process_batch(engine, batch_data): batch, batch_indices, batch_weights = batch_data