Example #1
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         last_r = self.value(sample_batch["new_obs"][-1])
     return compute_advantages(
         sample_batch, last_r, self.config["gamma"], self.config["lambda"])
Example #2
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         next_state = []
         for i in range(len(self.state_in)):
             next_state.append([sample_batch["state_out_{}".format(i)][-1]])
         last_r = self.value(sample_batch["new_obs"][-1], *next_state)
     return compute_advantages(sample_batch, last_r, self.config["gamma"],
                               self.config["lambda"])
Example #3
0
    def sample(self):
        """Returns a batch of samples."""

        rollout = self.sampler.get_data()
        rollout.data["weights"] = np.ones_like(rollout.data["rewards"])

        # since each sample is one step, no discounting needs to be applied;
        # this does not involve config["gamma"]
        samples = compute_advantages(rollout, 0.0, gamma=1.0, use_gae=False)

        return samples
Example #4
0
    def sample(self):
        """Returns experience samples from this Evaluator. Observation
        filter and reward filters are flushed here.

        Returns:
            SampleBatch: A columnar batch of experiences.
        """
        num_steps_so_far = 0
        all_samples = []

        while num_steps_so_far < self.config["min_steps_per_task"]:
            rollout = self.sampler.get_data()
            last_r = 0.0  # note: not needed since we don't truncate rollouts
            samples = compute_advantages(
                rollout, last_r, self.config["gamma"],
                self.config["lambda"], use_gae=self.config["use_gae"])
            num_steps_so_far += samples.count
            all_samples.append(samples)
        return SampleBatch.concat_samples(all_samples)
Example #5
0
 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     return compute_advantages(
         sample_batch, 0.0, self.config["gamma"], use_gae=False)
Example #6
0
 def postprocess_trajectory(self, batch):
     return compute_advantages(batch, 100.0, 0.9, use_gae=False)