def experience_cmdp(self, new_trajectory, other_info=None): if self.use_prioritized_experience_replay: add_episode(self.prioritized_replay_buffer, new_trajectory, gamma=self.gamma) else: add_episode(self.replay_buffer, new_trajectory, gamma=self.gamma)
def experience(self, new_trajectory, other_info=None): new_trajectory_gmv = other_info["gmv"] new_trajectory_cost = other_info["cost"] if self.use_prioritized_experience_replay: add_episode(self.prioritized_replay_buffer, new_trajectory, gamma=self.gamma) else: add_episode(self.replay_buffer, new_trajectory, gamma=self.gamma) add_episode(self.gmv_replay_buffer, new_trajectory_gmv, gamma=self.gamma) add_episode(self.cost_replay_buffer, new_trajectory_cost, gamma=self.gamma)