def make_policy_optimizer(workers, config): """Create the single process DQN policy optimizer. Returns: SyncReplayOptimizer: Used for generic off-policy Trainers. """ # SimpleQ does not use a PR buffer. kwargs = {"prioritized_replay": config.get("prioritized_replay", False)} kwargs.update(**config["optimizer"]) if "prioritized_replay" in config: kwargs.update({ "prioritized_replay_alpha": config["prioritized_replay_alpha"], "prioritized_replay_beta": config["prioritized_replay_beta"], "prioritized_replay_beta_annealing_timesteps": config[ "prioritized_replay_beta_annealing_timesteps"], "final_prioritized_replay_beta": config[ "final_prioritized_replay_beta"], "prioritized_replay_eps": config["prioritized_replay_eps"], }) return SyncReplayOptimizer( workers, # TODO(sven): Move all PR-beta decays into Schedule components. learning_starts=config["learning_starts"], buffer_size=config["buffer_size"], train_batch_size=config["train_batch_size"], **kwargs)
def make_optimizer(workers, config): return SyncReplayOptimizer(workers, learning_starts=config["learning_starts"], buffer_size=config["buffer_size"], train_batch_size=config["train_batch_size"], before_learn_on_batch=before_learn_on_batch, synchronize_sampling=True, prioritized_replay=False)
def make_optimizer(workers, config): return SyncReplayOptimizer( workers, learning_starts=config["learning_starts"], buffer_size=config["buffer_size"], prioritized_replay=config["prioritized_replay"], prioritized_replay_alpha=config["prioritized_replay_alpha"], prioritized_replay_beta=config["prioritized_replay_beta"], schedule_max_timesteps=config["schedule_max_timesteps"], beta_annealing_fraction=config["beta_annealing_fraction"], final_prioritized_replay_beta=config["final_prioritized_replay_beta"], prioritized_replay_eps=config["prioritized_replay_eps"], train_batch_size=config["train_batch_size"], sample_batch_size=config["sample_batch_size"], **config["optimizer"])
def make_policy_optimizer(workers, config): """Create the single process DQN policy optimizer. Returns: SyncReplayOptimizer: Used for generic off-policy Trainers. """ return SyncReplayOptimizer( workers, # TODO(sven): Move all PR-beta decays into Schedule components. learning_starts=config["learning_starts"], buffer_size=config["buffer_size"], prioritized_replay=config["prioritized_replay"], prioritized_replay_alpha=config["prioritized_replay_alpha"], prioritized_replay_beta=config["prioritized_replay_beta"], prioritized_replay_beta_annealing_timesteps=config[ "prioritized_replay_beta_annealing_timesteps"], final_prioritized_replay_beta=config["final_prioritized_replay_beta"], prioritized_replay_eps=config["prioritized_replay_eps"], train_batch_size=config["train_batch_size"], **config["optimizer"])
def make_data_augmenting_policy_optimizer(workers, config): kwargs = {"prioritized_replay": config.get("prioritized_replay", False)} kwargs.update(**config["optimizer"]) if "prioritized_replay" in config: kwargs.update({ "prioritized_replay_alpha": config["prioritized_replay_alpha"], "prioritized_replay_beta": config["prioritized_replay_beta"], "prioritized_replay_beta_annealing_timesteps": config["prioritized_replay_beta_annealing_timesteps"], "final_prioritized_replay_beta": config["final_prioritized_replay_beta"], "prioritized_replay_eps": config["prioritized_replay_eps"], }) return SyncReplayOptimizer(workers, learning_starts=config["learning_starts"], buffer_size=config["buffer_size"], train_batch_size=config["train_batch_size"], before_learn_on_batch=before_learn_on_batch, **kwargs)