Esempio n. 1
0
 def train(self):
     return Agent.train(self)
Esempio n. 2
0
 def train(self):
     # since the algorithm works with experience replay buffer (non-episodic),
     # we cant use the policy optimization train method. we need Agent.train
     # note that since in Agent.train there is no apply_gradients, we need to do it in learn from batch
     return Agent.train(self)
Esempio n. 3
0
 def train(self):
     self.ap.algorithm.num_consecutive_training_steps = \
         int(self.current_episode_steps_counter * self.ap.algorithm.td3_training_ratio)
     return Agent.train(self)
 def train(self):
     self.ap.algorithm.num_consecutive_training_steps = self.current_episode_steps_counter
     return Agent.train(self)