def compute_steps(self, config, obs_filter, rew_filter): """Compute multiple rollouts and concatenate the results. Args: config: Configuration parameters obs_filter: Function that is applied to each of the observations. reward_filter: Function that is applied to each of the rewards. Returns: states: List of states. total_rewards: Total rewards of the trajectories. trajectory_lengths: Lengths of the trajectories. """ num_steps_so_far = 0 trajectories = [] self.update_filters(obs_filter, rew_filter) while num_steps_so_far < config["min_steps_per_task"]: rollout = self.sampler.get_data() trajectory = process_rollout( rollout, self.reward_filter, config["gamma"], config["lambda"], use_gae=config["use_gae"]) num_steps_so_far += trajectory["rewards"].shape[0] trajectories.append(trajectory) metrics = self.sampler.get_metrics() total_rewards, trajectory_lengths = zip(*[ (c.episode_reward, c.episode_length) for c in metrics]) updated_obs_filter = self.sampler.get_obs_filter(flush=True) return ( concatenate(trajectories), total_rewards, trajectory_lengths, updated_obs_filter, self.reward_filter)
def sample(self): rollout = self.sampler.get_data() samples = process_rollout(rollout, NoFilter(), gamma=self.config["gamma"], use_gae=False) return samples
def sample(self): rollout = self.sampler.get_data() samples = process_rollout(rollout, self.rew_filter, gamma=self.config["gamma"], lambda_=self.config["lambda"], use_gae=True) return samples
def compute_gradient(self): rollout = self.sampler.get_data() obs_filter = self.sampler.get_obs_filter(flush=True) traj = process_rollout( rollout, self.rew_filter, gamma=0.99, lambda_=1.0, use_gae=True) gradient, info = self.policy.compute_gradients(traj) info["obs_filter"] = obs_filter info["rew_filter"] = self.rew_filter return gradient, info
def sample(self): """ Returns: trajectory (PartialRollout): Experience Samples from evaluator""" rollout = self.sampler.get_data() samples = process_rollout(rollout, self.rew_filter, gamma=self.config["gamma"], lambda_=self.config["lambda"], use_gae=True) return samples
def sample(self): """Returns a batch of samples.""" rollout = self.sampler.get_data() rollout.data["weights"] = np.ones_like(rollout.data["rewards"]) # since each sample is one step, no discounting needs to be applied; # this does not involve config["gamma"] samples = process_rollout( rollout, NoFilter(), gamma=1.0, use_gae=False) return samples
def sample(self): """Returns experience samples from this Evaluator. Observation filter and reward filters are flushed here. Returns: SampleBatch: A columnar batch of experiences. """ num_steps_so_far = 0 all_samples = [] while num_steps_so_far < self.config["min_steps_per_task"]: rollout = self.sampler.get_data() samples = process_rollout( rollout, self.rew_filter, self.config["gamma"], self.config["lambda"], use_gae=self.config["use_gae"]) num_steps_so_far += samples.count all_samples.append(samples) return SampleBatch.concat_samples(all_samples)
def sample(self): """Returns experience samples from this Evaluator. Observation filter and reward filters are flushed here. Returns: SampleBatch: A columnar batch of experiences. """ num_steps_so_far = 0 all_samples = [] while num_steps_so_far < self.config["min_steps_per_task"]: rollout = self.sampler.get_data() samples = process_rollout( rollout, self.rew_filter, self.config["gamma"], self.config["lambda"], use_gae=self.config["use_gae"]) num_steps_so_far += samples.count all_samples.append(samples) return SampleBatch.concat_samples(all_samples)
def sample(self): rollout = self.sampler.get_data() samples = process_rollout( rollout, self.rew_filter, gamma=self.config["gamma"], lambda_=self.config["lambda"], use_gae=True) return samples
def sample(self): rollout = self.sampler.get_data() samples = process_rollout( rollout, NoFilter(), gamma=self.config["gamma"], use_gae=False) return samples