def train(self): return Agent.train(self)
def train(self): # since the algorithm works with experience replay buffer (non-episodic), # we cant use the policy optimization train method. we need Agent.train # note that since in Agent.train there is no apply_gradients, we need to do it in learn from batch return Agent.train(self)
def train(self): self.ap.algorithm.num_consecutive_training_steps = \ int(self.current_episode_steps_counter * self.ap.algorithm.td3_training_ratio) return Agent.train(self)
def train(self): self.ap.algorithm.num_consecutive_training_steps = self.current_episode_steps_counter return Agent.train(self)