def postprocess_trajectory(self, sample_batch, other_agent_batches=None): completed = sample_batch["dones"][-1] if completed: last_r = 0.0 else: last_r = self._value(sample_batch["new_obs"][-1]) return compute_advantages( sample_batch, last_r, self.config["gamma"], self.config["lambda"])
def postprocess_trajectory(self, sample_batch, other_agent_batches=None): last_r = 0.0 batch = compute_advantages(sample_batch, last_r, self.config["gamma"], self.config["lambda"], use_gae=self.config["use_gae"]) return batch
def postprocess_trajectory(self, sample_batch, other_agent_batches=None): completed = sample_batch["dones"][-1] if completed: last_r = 0.0 else: next_state = [] for i in range(len(self.state_in)): next_state.append([sample_batch["state_out_{}".format(i)][-1]]) last_r = self.value(sample_batch["new_obs"][-1], *next_state) return compute_advantages(sample_batch, last_r, self.config["gamma"], self.config["lambda"])
def postprocess_trajectory(self, sample_batch, other_agent_batches=None): return compute_advantages(sample_batch, 0.0, self.config["gamma"], use_gae=False)
def postprocess_trajectory(self, batch, other_agent_batches=None): return compute_advantages(batch, 100.0, 0.9, use_gae=False)