コード例 #1
0
    env_params.register(params)

    model = Net(params.n_actions,
                input_shape=(1 if grayscale else 3, im_height, im_width))
    if params.cuda_enabled:
        model.cuda()

    loss_fn = nn.MSELoss(size_average=False)
    optimizer = optim.Adam(model.parameters(),
                           lr=run.getfloat("learning", "lr"))

    action_selector = ActionSelectorEpsilonGreedy(epsilon=run.getfloat(
        "defaults", "epsilon"),
                                                  params=params)
    target_net = agent.TargetNet(model)
    dqn_agent = agent.DQNAgent(dqn_model=model,
                               action_selector=action_selector)
    exp_source = experience.ExperienceSource(env=env_pool,
                                             agent=dqn_agent,
                                             steps_count=run.getint(
                                                 "defaults", "n_steps"))
    exp_replay = experience.ExperienceReplayBuffer(exp_source,
                                                   buffer_size=run.getint(
                                                       "exp_buffer", "size"))

    use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False)
    use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False)

    if use_target_dqn:
        target_model = target_net.target_model
    else:
        target_model = model
コード例 #2
0
ファイル: dqn_tweaks_atari.py プロジェクト: zivzone/ptan
    model = Net(env_pool[0].action_space.n,
                input_shape=(frames_count if grayscale else 3 * frames_count,
                             im_height, im_width),
                dueling=run.getboolean("dqn", "dueling"))
    if cuda_enabled:
        model.cuda()

    loss_fn = utils.WeightedMSELoss(size_average=True)
    optimizer = optim.Adam(model.parameters(),
                           lr=run.getfloat("learning", "lr"))

    action_selector = ptan.actions.EpsilonGreedyActionSelector(
        epsilon=run.getfloat("defaults", "epsilon"))
    target_net = agent.TargetNet(model)
    dqn_agent = agent.DQNAgent(dqn_model=model,
                               action_selector=action_selector,
                               cuda=cuda_enabled)
    exp_source = experience.ExperienceSource(env=env_pool,
                                             agent=dqn_agent,
                                             steps_count=run.getint(
                                                 "defaults", "n_steps"))
    exp_replay = experience.ExperienceReplayBuffer(exp_source,
                                                   buffer_size=run.getint(
                                                       "exp_buffer", "size"))
    # exp_replay = experience.PrioritizedReplayBuffer(exp_source, buffer_size=run.getint("exp_buffer", "size"),
    #                                                 prob_alpha=run.getfloat("exp_buffer", "prio_alpha"),
    #                                                 weight_beta=run.getfloat("exp_buffer", "prio_beta"))

    use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False)
    use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False)