parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable cuda")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    env = gym.make(params.env_name)
    env = drl.common.wrappers.wrap_dqn(env)
    env.seed(common.SEED)
    input_shape = env.observation_space.shape
    n_actions = env.action_space.n

    selector = dac.GreedySelector()

    net = dqn_extra.RainbowDQN(input_shape, n_actions).to(device)
    agent = dag.DQNAgent(net, selector, device)
    tgt_net = dag.TargetNet(net)

    buffer = dexp.PrioReplayBuffer(params.replay_size, PRIO_REPLAY_ALPHA,
                                   BETA_START)
    exp_source = dexp.ExperienceSource(env, agent, buffer, 1, params.gamma)
    beta_tracker = dexp.BetaTracker(buffer, BETA_START, BETA_FRAMES)

    writer = SummaryWriter(comment="-" + params.env_name)
    print(net)

    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)
    total_reward = []
    frame_idx = 0
    ts_frame = 0
Exemple #2
0
    random.seed(common.SEED)
    torch.manual_seed(common.SEED)
    params = common.HYPERPARAMS['pong']
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable cuda")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    env = gym.make(params.env_name)
    env = ptan.common.wrappers.wrap_dqn(env)
    env.seed(common.SEED)

    net = dqn_extra.RainbowDQN(env.observation_space.shape,
                               env.action_space.n).to(device)

    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.ArgmaxActionSelector()
    agent = ptan.agent.DQNAgent(net, selector, device=device)

    exp_source = ptan.experience.ExperienceSourceFirstLast(env,
                                                           agent,
                                                           gamma=params.gamma,
                                                           steps_count=N_STEPS)
    buffer = dqn_extra.PrioReplayBuffer(exp_source, params.replay_size,
                                        PRIO_REPLAY_ALPHA)
    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)

    def process_batch(engine, batch_data):
        batch, batch_indices, batch_weights = batch_data