Exemple #1
0
 def __init__(self, rl2_max_path_length, meta_batch_size, task_sampler,
              **inner_algo_args):
     self._inner_algo = RL2NPO(**inner_algo_args)
     self._rl2_max_path_length = rl2_max_path_length
     self._env_spec = self._inner_algo.env_spec
     self._flatten_input = self._inner_algo.flatten_input
     self._policy = self._inner_algo.policy
     self._discount = self._inner_algo.discount
     self._meta_batch_size = meta_batch_size
     self._task_sampler = task_sampler
Exemple #2
0
 def __init__(self, rl2_max_path_length, meta_batch_size, task_sampler,
              meta_evaluator, n_epochs_per_eval, **inner_algo_args):
     self._inner_algo = RL2NPO(**inner_algo_args)
     self._rl2_max_path_length = rl2_max_path_length
     self.env_spec = self._inner_algo._env_spec
     self._n_epochs_per_eval = n_epochs_per_eval
     self._policy = self._inner_algo.policy
     self._discount = self._inner_algo._discount
     self._meta_batch_size = meta_batch_size
     self._task_sampler = task_sampler
     self._meta_evaluator = meta_evaluator
Exemple #3
0
 def __init__(self, env_spec, episodes_per_trial, meta_batch_size,
              task_sampler, meta_evaluator, n_epochs_per_eval,
              **inner_algo_args):
     self._env_spec = env_spec
     _inner_env_spec = EnvSpec(
         env_spec.observation_space, env_spec.action_space,
         episodes_per_trial * env_spec.max_episode_length)
     self._inner_algo = RL2NPO(env_spec=_inner_env_spec, **inner_algo_args)
     self._rl2_max_episode_length = self._env_spec.max_episode_length
     self._n_epochs_per_eval = n_epochs_per_eval
     self._policy = self._inner_algo.policy
     self._discount = self._inner_algo._discount
     self._meta_batch_size = meta_batch_size
     self._task_sampler = task_sampler
     self._meta_evaluator = meta_evaluator