コード例 #1
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         last_r = self._value(sample_batch["new_obs"][-1])
     return compute_advantages(
         sample_batch, last_r, self.config["gamma"], self.config["lambda"])
コード例 #2
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     last_r = 0.0
     batch = compute_advantages(sample_batch,
                                last_r,
                                self.config["gamma"],
                                self.config["lambda"],
                                use_gae=self.config["use_gae"])
     return batch
コード例 #3
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         next_state = []
         for i in range(len(self.state_in)):
             next_state.append([sample_batch["state_out_{}".format(i)][-1]])
         last_r = self.value(sample_batch["new_obs"][-1], *next_state)
     return compute_advantages(sample_batch, last_r, self.config["gamma"],
                               self.config["lambda"])
コード例 #4
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     return compute_advantages(sample_batch,
                               0.0,
                               self.config["gamma"],
                               use_gae=False)
コード例 #5
0
 def postprocess_trajectory(self, batch, other_agent_batches=None):
     return compute_advantages(batch, 100.0, 0.9, use_gae=False)