Python compute_advantages Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.utils.process_rollout

Method/Function: compute_advantages

Examples at hotexamples.com: 6

Python compute_advantages - 6 examples found. These are the top rated real world Python examples of ray.rllib.utils.process_rollout.compute_advantages extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: a3c_torch_policy.py Project: zerocurve/ray

 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         last_r = self.value(sample_batch["new_obs"][-1])
     return compute_advantages(
         sample_batch, last_r, self.config["gamma"], self.config["lambda"])

Example #2

Show file

 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     completed = sample_batch["dones"][-1]
     if completed:
         last_r = 0.0
     else:
         next_state = []
         for i in range(len(self.state_in)):
             next_state.append([sample_batch["state_out_{}".format(i)][-1]])
         last_r = self.value(sample_batch["new_obs"][-1], *next_state)
     return compute_advantages(sample_batch, last_r, self.config["gamma"],
                               self.config["lambda"])

Example #3

Show file

    def sample(self):
        """Returns a batch of samples."""

        rollout = self.sampler.get_data()
        rollout.data["weights"] = np.ones_like(rollout.data["rewards"])

        # since each sample is one step, no discounting needs to be applied;
        # this does not involve config["gamma"]
        samples = compute_advantages(rollout, 0.0, gamma=1.0, use_gae=False)

        return samples

Example #4

Show file

    def sample(self):
        """Returns experience samples from this Evaluator. Observation
        filter and reward filters are flushed here.

        Returns:
            SampleBatch: A columnar batch of experiences.
        """
        num_steps_so_far = 0
        all_samples = []

        while num_steps_so_far < self.config["min_steps_per_task"]:
            rollout = self.sampler.get_data()
            last_r = 0.0  # note: not needed since we don't truncate rollouts
            samples = compute_advantages(
                rollout, last_r, self.config["gamma"],
                self.config["lambda"], use_gae=self.config["use_gae"])
            num_steps_so_far += samples.count
            all_samples.append(samples)
        return SampleBatch.concat_samples(all_samples)

Example #5

Show file

 def postprocess_trajectory(self, sample_batch, other_agent_batches=None):
     return compute_advantages(
         sample_batch, 0.0, self.config["gamma"], use_gae=False)

Example #6

Show file

 def postprocess_trajectory(self, batch):
     return compute_advantages(batch, 100.0, 0.9, use_gae=False)