def default_replay(): return replays.Buffer(size=int(1e6), batch_iterations=50, batch_size=100, discount_factor=0.98, steps_before_batches=5000, steps_between_batches=50)
def __init__( self, model=None, replay=None, actor_updater=None, critic_updater=None ): self.model = model or default_model() self.replay = replay or replays.Buffer(return_steps=5) self.actor_updater = actor_updater or \ updaters.MaximumAPosterioriPolicyOptimization() self.critic_updater = critic_updater or updaters.ExpectedSARSA()
def __init__( self, model=None, replay=None, exploration=None, actor_updater=None, critic_updater=None ): self.model = model or default_model() self.replay = replay or replays.Buffer() self.exploration = exploration or explorations.NormalActionNoise() self.actor_updater = actor_updater or \ updaters.DeterministicPolicyGradient() self.critic_updater = critic_updater or \ updaters.DeterministicQLearning()
def __init__( self, model=None, replay=None, exploration=None, actor_updater=None, critic_updater=None ): model = model or default_model() replay = replay or replays.Buffer(num_steps=5) actor_updater = actor_updater or \ updaters.DistributionalDeterministicPolicyGradient() critic_updater = critic_updater or \ updaters.DistributionalDeterministicQLearning() super().__init__( model, replay, exploration, actor_updater, critic_updater)