Esempio n. 1
0
 def obtain_samples(self, itr):
     cur_params = self.algo.policy.get_param_values()
     paths = parallel_sampler.sample_paths(
         policy_params=cur_params,
         max_samples=self.algo.batch_size,
         max_path_length=self.algo.max_path_length,
         scope=self.algo.scope,
     )
     if self.algo.whole_paths:
         return paths
     else:
         paths_truncated = parallel_sampler.truncate_paths(
             paths, self.algo.batch_size)
         return paths_truncated
Esempio n. 2
0
 def obtain_is_samples(self, itr):
     paths = []
     for hist_policy_distribution, hist_paths in self.get_history_list(
             self.n_backtrack):
         h_paths = self.sample_isweighted_paths(
             policy=self.algo.policy,
             hist_policy_distribution=hist_policy_distribution,
             max_samples=self.algo.batch_size,
             max_path_length=self.algo.max_path_length,
             paths=hist_paths,
             hist_variance_penalty=self.hist_variance_penalty,
             max_is_ratio=self.max_is_ratio,
             ess_threshold=self.ess_threshold,
         )
         paths.extend(h_paths)
     if len(paths) > self.algo.batch_size:
         paths = random.sample(paths, self.algo.batch_size)
     if self.algo.whole_paths:
         return paths
     else:
         paths_truncated = parallel_sampler.truncate_paths(
             paths, self.algo.batch_size)
         return paths_truncated