Esempio n. 1
0
def make_policy_optimizer(workers, config):
    """Create the single process DQN policy optimizer.

    Returns:
        SyncReplayOptimizer: Used for generic off-policy Trainers.
    """
    # SimpleQ does not use a PR buffer.
    kwargs = {"prioritized_replay": config.get("prioritized_replay", False)}
    kwargs.update(**config["optimizer"])
    if "prioritized_replay" in config:
        kwargs.update({
            "prioritized_replay_alpha": config["prioritized_replay_alpha"],
            "prioritized_replay_beta": config["prioritized_replay_beta"],
            "prioritized_replay_beta_annealing_timesteps": config[
                "prioritized_replay_beta_annealing_timesteps"],
            "final_prioritized_replay_beta": config[
                "final_prioritized_replay_beta"],
            "prioritized_replay_eps": config["prioritized_replay_eps"],
        })

    return SyncReplayOptimizer(
        workers,
        # TODO(sven): Move all PR-beta decays into Schedule components.
        learning_starts=config["learning_starts"],
        buffer_size=config["buffer_size"],
        train_batch_size=config["train_batch_size"],
        **kwargs)
Esempio n. 2
0
File: maddpg.py Progetto: wsjeon/ray
def make_optimizer(workers, config):
    return SyncReplayOptimizer(workers,
                               learning_starts=config["learning_starts"],
                               buffer_size=config["buffer_size"],
                               train_batch_size=config["train_batch_size"],
                               before_learn_on_batch=before_learn_on_batch,
                               synchronize_sampling=True,
                               prioritized_replay=False)
Esempio n. 3
0
def make_optimizer(workers, config):
    return SyncReplayOptimizer(
        workers,
        learning_starts=config["learning_starts"],
        buffer_size=config["buffer_size"],
        prioritized_replay=config["prioritized_replay"],
        prioritized_replay_alpha=config["prioritized_replay_alpha"],
        prioritized_replay_beta=config["prioritized_replay_beta"],
        schedule_max_timesteps=config["schedule_max_timesteps"],
        beta_annealing_fraction=config["beta_annealing_fraction"],
        final_prioritized_replay_beta=config["final_prioritized_replay_beta"],
        prioritized_replay_eps=config["prioritized_replay_eps"],
        train_batch_size=config["train_batch_size"],
        sample_batch_size=config["sample_batch_size"],
        **config["optimizer"])
Esempio n. 4
0
def make_policy_optimizer(workers, config):
    """Create the single process DQN policy optimizer.

    Returns:
        SyncReplayOptimizer: Used for generic off-policy Trainers.
    """
    return SyncReplayOptimizer(
        workers,
        # TODO(sven): Move all PR-beta decays into Schedule components.
        learning_starts=config["learning_starts"],
        buffer_size=config["buffer_size"],
        prioritized_replay=config["prioritized_replay"],
        prioritized_replay_alpha=config["prioritized_replay_alpha"],
        prioritized_replay_beta=config["prioritized_replay_beta"],
        prioritized_replay_beta_annealing_timesteps=config[
            "prioritized_replay_beta_annealing_timesteps"],
        final_prioritized_replay_beta=config["final_prioritized_replay_beta"],
        prioritized_replay_eps=config["prioritized_replay_eps"],
        train_batch_size=config["train_batch_size"],
        **config["optimizer"])
def make_data_augmenting_policy_optimizer(workers, config):
    kwargs = {"prioritized_replay": config.get("prioritized_replay", False)}
    kwargs.update(**config["optimizer"])
    if "prioritized_replay" in config:
        kwargs.update({
            "prioritized_replay_alpha":
            config["prioritized_replay_alpha"],
            "prioritized_replay_beta":
            config["prioritized_replay_beta"],
            "prioritized_replay_beta_annealing_timesteps":
            config["prioritized_replay_beta_annealing_timesteps"],
            "final_prioritized_replay_beta":
            config["final_prioritized_replay_beta"],
            "prioritized_replay_eps":
            config["prioritized_replay_eps"],
        })
    return SyncReplayOptimizer(workers,
                               learning_starts=config["learning_starts"],
                               buffer_size=config["buffer_size"],
                               train_batch_size=config["train_batch_size"],
                               before_learn_on_batch=before_learn_on_batch,
                               **kwargs)