Esempio n. 1
0
def default_replay():
    return replays.Buffer(size=int(1e6),
                          batch_iterations=50,
                          batch_size=100,
                          discount_factor=0.98,
                          steps_before_batches=5000,
                          steps_between_batches=50)
Esempio n. 2
0
 def __init__(
     self, model=None, replay=None, actor_updater=None, critic_updater=None
 ):
     self.model = model or default_model()
     self.replay = replay or replays.Buffer(return_steps=5)
     self.actor_updater = actor_updater or \
         updaters.MaximumAPosterioriPolicyOptimization()
     self.critic_updater = critic_updater or updaters.ExpectedSARSA()
Esempio n. 3
0
 def __init__(
     self, model=None, replay=None, exploration=None, actor_updater=None,
     critic_updater=None
 ):
     self.model = model or default_model()
     self.replay = replay or replays.Buffer()
     self.exploration = exploration or explorations.NormalActionNoise()
     self.actor_updater = actor_updater or \
         updaters.DeterministicPolicyGradient()
     self.critic_updater = critic_updater or \
         updaters.DeterministicQLearning()
Esempio n. 4
0
 def __init__(
     self, model=None, replay=None, exploration=None, actor_updater=None,
     critic_updater=None
 ):
     model = model or default_model()
     replay = replay or replays.Buffer(num_steps=5)
     actor_updater = actor_updater or \
         updaters.DistributionalDeterministicPolicyGradient()
     critic_updater = critic_updater or \
         updaters.DistributionalDeterministicQLearning()
     super().__init__(
         model, replay, exploration, actor_updater, critic_updater)