Esempio n. 1
0
    def __init__(self,
                 policy,
                 env,
                 n_envs,
                 replay_pool_size,
                 max_path_length,
                 sampling_method,
                 save_rollouts=False,
                 save_rollouts_observations=True,
                 save_env_infos=False,
                 env_str=None,
                 replay_pool_params={}):
        self._policy = policy
        self._n_envs = n_envs

        assert (self._n_envs == 1)  # b/c policy reset

        self._replay_pools = [
            RNNCriticReplayPool(
                env.spec,
                env.horizon,
                policy.N,
                policy.gamma,
                replay_pool_size // n_envs,
                obs_history_len=policy.obs_history_len,
                sampling_method=sampling_method,
                save_rollouts=save_rollouts,
                save_rollouts_observations=save_rollouts_observations,
                save_env_infos=save_env_infos,
                replay_pool_params=replay_pool_params) for _ in range(n_envs)
        ]

        try:
            envs = [
                pickle.loads(pickle.dumps(env)) for _ in range(self._n_envs)
            ] if self._n_envs > 1 else [env]
        except:
            envs = [create_env(env_str) for _ in range(self._n_envs)
                    ] if self._n_envs > 1 else [env]
        ### need to seed each environment if it is GymEnv
        seed = get_seed()
        if seed is not None and isinstance(utils.inner_env(env), GymEnv):
            for i, env in enumerate(envs):
                utils.inner_env(env).env.seed(seed + i)
        self._vec_env = VecEnvExecutor(envs=envs,
                                       max_path_length=max_path_length)
        self._curr_observations = self._vec_env.reset()
Esempio n. 2
0
 def statistics(self):
     return RNNCriticReplayPool.statistics_pools(self._replay_pools)
Esempio n. 3
0
 def get_recent_paths(self):
     return RNNCriticReplayPool.get_recent_paths_pools(self._replay_pools)
Esempio n. 4
0
 def log(self, prefix=''):
     RNNCriticReplayPool.log_pools(self._replay_pools, self._nb_target_lost, prefix=prefix)
Esempio n. 5
0
 def sample(self, batch_size):
     return RNNCriticReplayPool.sample_pools(self._replay_pools, batch_size,
                                             only_completed_episodes=self._policy.only_completed_episodes)
Esempio n. 6
0
 def log(self, prefix=''):
     RNNCriticReplayPool.log_pools(self._replay_pools, prefix=prefix)