Example #1
0
    def compute_steps(self, config, obs_filter, rew_filter):
        """Compute multiple rollouts and concatenate the results.

        Args:
            config: Configuration parameters
            obs_filter: Function that is applied to each of the
                observations.
            reward_filter: Function that is applied to each of the rewards.

        Returns:
            states: List of states.
            total_rewards: Total rewards of the trajectories.
            trajectory_lengths: Lengths of the trajectories.
        """
        num_steps_so_far = 0
        trajectories = []
        self.update_filters(obs_filter, rew_filter)

        while num_steps_so_far < config["min_steps_per_task"]:
            rollout = self.sampler.get_data()
            trajectory = process_rollout(
                rollout, self.reward_filter, config["gamma"],
                config["lambda"], use_gae=config["use_gae"])
            num_steps_so_far += trajectory["rewards"].shape[0]
            trajectories.append(trajectory)
        metrics = self.sampler.get_metrics()
        total_rewards, trajectory_lengths = zip(*[
            (c.episode_reward, c.episode_length) for c in metrics])
        updated_obs_filter = self.sampler.get_obs_filter(flush=True)
        return (
            concatenate(trajectories),
            total_rewards,
            trajectory_lengths,
            updated_obs_filter,
            self.reward_filter)
Example #2
0
 def sample(self):
     rollout = self.sampler.get_data()
     samples = process_rollout(rollout,
                               NoFilter(),
                               gamma=self.config["gamma"],
                               use_gae=False)
     return samples
Example #3
0
 def sample(self):
     rollout = self.sampler.get_data()
     samples = process_rollout(rollout,
                               self.rew_filter,
                               gamma=self.config["gamma"],
                               lambda_=self.config["lambda"],
                               use_gae=True)
     return samples
Example #4
0
    def compute_gradient(self):
        rollout = self.sampler.get_data()
        obs_filter = self.sampler.get_obs_filter(flush=True)

        traj = process_rollout(
            rollout, self.rew_filter, gamma=0.99, lambda_=1.0, use_gae=True)
        gradient, info = self.policy.compute_gradients(traj)
        info["obs_filter"] = obs_filter
        info["rew_filter"] = self.rew_filter
        return gradient, info
Example #5
0
 def sample(self):
     """
     Returns:
         trajectory (PartialRollout): Experience Samples from evaluator"""
     rollout = self.sampler.get_data()
     samples = process_rollout(rollout,
                               self.rew_filter,
                               gamma=self.config["gamma"],
                               lambda_=self.config["lambda"],
                               use_gae=True)
     return samples
Example #6
0
    def sample(self):
        """Returns a batch of samples."""

        rollout = self.sampler.get_data()
        rollout.data["weights"] = np.ones_like(rollout.data["rewards"])

        # since each sample is one step, no discounting needs to be applied;
        # this does not involve config["gamma"]
        samples = process_rollout(
                    rollout, NoFilter(),
                    gamma=1.0, use_gae=False)

        return samples
Example #7
0
    def sample(self):
        """Returns experience samples from this Evaluator. Observation
        filter and reward filters are flushed here.

        Returns:
            SampleBatch: A columnar batch of experiences.
        """
        num_steps_so_far = 0
        all_samples = []

        while num_steps_so_far < self.config["min_steps_per_task"]:
            rollout = self.sampler.get_data()
            samples = process_rollout(
                rollout, self.rew_filter, self.config["gamma"],
                self.config["lambda"], use_gae=self.config["use_gae"])
            num_steps_so_far += samples.count
            all_samples.append(samples)
        return SampleBatch.concat_samples(all_samples)
Example #8
0
    def sample(self):
        """Returns experience samples from this Evaluator. Observation
        filter and reward filters are flushed here.

        Returns:
            SampleBatch: A columnar batch of experiences.
        """
        num_steps_so_far = 0
        all_samples = []

        while num_steps_so_far < self.config["min_steps_per_task"]:
            rollout = self.sampler.get_data()
            samples = process_rollout(
                rollout, self.rew_filter, self.config["gamma"],
                self.config["lambda"], use_gae=self.config["use_gae"])
            num_steps_so_far += samples.count
            all_samples.append(samples)
        return SampleBatch.concat_samples(all_samples)
Example #9
0
 def sample(self):
     rollout = self.sampler.get_data()
     samples = process_rollout(
         rollout, self.rew_filter, gamma=self.config["gamma"],
         lambda_=self.config["lambda"], use_gae=True)
     return samples
Example #10
0
 def sample(self):
     rollout = self.sampler.get_data()
     samples = process_rollout(
                 rollout, NoFilter(),
                 gamma=self.config["gamma"], use_gae=False)
     return samples