Example #1
0
 def experience_cmdp(self, new_trajectory, other_info=None):
     if self.use_prioritized_experience_replay:
         add_episode(self.prioritized_replay_buffer,
                     new_trajectory,
                     gamma=self.gamma)
     else:
         add_episode(self.replay_buffer, new_trajectory, gamma=self.gamma)
Example #2
0
    def experience(self, new_trajectory, other_info=None):
        new_trajectory_gmv = other_info["gmv"]
        new_trajectory_cost = other_info["cost"]
        if self.use_prioritized_experience_replay:
            add_episode(self.prioritized_replay_buffer, new_trajectory, gamma=self.gamma)
        else:
            add_episode(self.replay_buffer, new_trajectory, gamma=self.gamma)

        add_episode(self.gmv_replay_buffer, new_trajectory_gmv, gamma=self.gamma)
        add_episode(self.cost_replay_buffer, new_trajectory_cost, gamma=self.gamma)