Beispiel #1
0
def play_func(params, net, cuda, exp_queue):
    env = make(params.env_name)
    env = wrap_dqn(env)
    env.seed(common.SEED)
    device = torch.device("cuda" if cuda else "cpu")
    selector = EpsilonGreedyActionSelector(epsilon=params.epsilon_start)
    epsilon_tracker = common.EpsilonTracker(selector, params)
    agent = DQNAgent(net, selector, device=device)
    exp_source = ExperienceSourceFirstLast(env, agent, gamma=params.gamma)

    for frame_idx, exp in enumerate(exp_source):
        epsilon_tracker.frame(frame_idx / BATCH_MUL)
        exp_queue.put(exp)
        for reward, steps in exp_source.pop_rewards_steps():
            exp_queue.put(EpisodeEnded(reward, steps, selector.epsilon))
Beispiel #2
0
    params = common.HYPERPARAMS["pong"]

    parser = ArgumentParser()
    parser.add_argument("--cuda",
                        default=True,
                        action="store_true",
                        help="Enable cuda")
    parser.add_argument("-n",
                        type=int,
                        default=DEFAULT_N_STEPS,
                        help="steps to do on Bellman unroll")
    args = parser.parse_args()
    device = device("cuda" if args.cuda else "cpu")

    env = make(params.env_name)
    env = wrap_dqn(env)
    env.seed(123)
    net = dqn_model.DQN(env.observation_space.shape,
                        env.action_space.n).to(device)
    tgt_net = TargetNet(net)

    selector = EpsilonGreedyActionSelector(epsilon=params.epsilon_start)
    epsilon_tracker = common.EpsilonTracker(selector, params)
    agent = DQNAgent(net, selector, device=device)
    exp_source = ExperienceSourceFirstLast(env,
                                           agent,
                                           gamma=params.gamma,
                                           steps_count=args.n)
    buffer = ExperienceReplayBuffer(exp_source, buffer_size=params.replay_size)
    optimizer = Adam(net.parameters(), lr=params.learning_rate)
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='n-step noisy DQN')
    parser.add_argument('-n',
                        default=1,
                        type=int,
                        help='Enter the number of steps to unroll bellman eq')
    args = parser.parse_args()

    print('Starting...')
    params = HYPERPARAMS['pong']
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Running on Device {}'.format(device))
    writer = writer = SummaryWriter(comment="-" + params['run_name'] +
                                    "-%d-step noisy-net" % args.n)
    env = gym.make(params['env_name'])
    env = wrappers.wrap_dqn(env)
    # print(env.observation_space.shape, env.action_space.n)
    net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(device)
    target_net = TargetNet(net)

    agent = DQNAgent(net, ArgmaxActionSelector(), device)

    experience_source = ExperienceSourceFirstLast(env,
                                                  agent,
                                                  params['gamma'],
                                                  steps_count=args.n)
    buffer = ExperienceReplayBuffer(experience_source,
                                    buffer_size=params['replay_size'])

    optimizer = optim.Adam(net.parameters(), lr=params['learning_rate'])
    frame_idx = 0