Exemple #1
0
 def __init__(
         self,
         env,
         reward_scale=1.,
         obs_mean=None,
         obs_std=None,
 ):
     # self._wrapped_env needs to be called first because
     # Serializable.quick_init calls getattr, on this class. And the
     # implementation of getattr (see below) calls self._wrapped_env.
     # Without setting this first, the call to self._wrapped_env would call
     # getattr again (since it's not set yet) and therefore loop forever.
     self._wrapped_env = env
     # Or else serialization gets delegated to the wrapped_env. Serialize
     # this env separately from the wrapped_env.
     self._serializable_initialized = False
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._should_normalize = not (obs_mean is None and obs_std is None)
     if self._should_normalize:
         if obs_mean is None:
             obs_mean = np.zeros_like(env.observation_space.low)
         else:
             obs_mean = np.array(obs_mean)
         if obs_std is None:
             obs_std = np.ones_like(env.observation_space.low)
         else:
             obs_std = np.array(obs_std)
     self._reward_scale = reward_scale
     self._obs_mean = obs_mean
     self._obs_std = obs_std
     ub = np.ones(self._wrapped_env.action_space.shape)
     self.action_space = Box(-1 * ub, ub)
Exemple #2
0
    def __init__(
        self,
        env,
        policy,
        exploration_policy,
        max_path_length,
        train_rollout_function,
        eval_rollout_function,
        num_workers=2,
    ):
        Serializable.quick_init(self, locals())
        super().__init__(env)
        self.num_workers = num_workers
        # Let self.worker_limits[True] be the max number of workers for training
        # and self.worker_limits[False] be the max number of workers for eval.
        self.worker_limits = {
            True: math.ceil(self.num_workers / 2),
            False: math.ceil(self.num_workers / 2),
        }

        self.parent_pipes = []
        self.child_pipes = []

        for _ in range(num_workers):
            parent_conn, child_conn = Pipe()
            self.parent_pipes.append(parent_conn)
            self.child_pipes.append(child_conn)

        self._workers = [
            Process(target=RemoteRolloutEnv._worker_loop,
                    args=(
                        self.child_pipes[i],
                        env,
                        policy,
                        exploration_policy,
                        max_path_length,
                        cloudpickle.dumps(train_rollout_function),
                        cloudpickle.dumps(eval_rollout_function),
                    )) for i in range(num_workers)
        ]

        for worker in self._workers:
            worker.start()

        self.free_pipes = set(self.parent_pipes)
        # self.pipe_info[pipe] stores (epoch, train_type)
        self.pipe_info = {}
        # Let self.promise_list[True] be the promises for training
        # and self.promise_list[False] be the promises for eval.
        self.rollout_promise_list = {
            True: [],
            False: [],
        }
Exemple #3
0
 def __init__(self,
              action_space,
              max_sigma=1.0,
              min_sigma=None,
              decay_period=1000000):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = action_space
Exemple #4
0
 def __init__(self, env):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self.action_space = convert_space_to_tf_space(
         self._wrapped_env.action_space
     )
     self.observation_space = convert_space_to_tf_space(
         self._wrapped_env.observation_space
     )
     from rllab.envs.env_spec import EnvSpec
     self.spec = EnvSpec(
         observation_space=self.observation_space,
         action_space=self.action_space,
     )
Exemple #5
0
    def save_init_params(self, locals):
        """
        Should call this FIRST THING in the __init__ method if you ever want
        to serialize or clone this network.

        Usage:
        ```
        def __init__(self, ...):
            self.init_serialization(locals())
            ...
        ```
        :param locals:
        :return:
        """
        Serializable.quick_init(self, locals)
Exemple #6
0
    def copy(self, copy_parameters=True):
        if not copy_parameters:
            # Basically the same code as clone, but do not set param values.
            assert isinstance(self, Serializable)
            d = Serializable.__getstate__(self)
            d["__kwargs"] = dict(d["__kwargs"])
            out = type(self).__new__(type(self))
            Serializable.__setstate__(out, d)
            return out

        copy = Serializable.clone(self)
        # Not actually necessary since the parameters should already be
        # copied, but just to be safe...
        ptu.copy_model_params_from_to(self, copy)
        return copy
Exemple #7
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     # Add these explicitly in case they were modified
     d["_obs_mean"] = self._obs_mean
     d["_obs_std"] = self._obs_std
     d["_reward_scale"] = self._reward_scale
     return d
Exemple #8
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self.set_param_values(d["params"])
Exemple #9
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     d["params"] = self.get_param_values()
     return d
Exemple #10
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_std = d["_obs_std"]
     self._reward_scale = d["_reward_scale"]
Exemple #11
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
     self.action_space = self._wrapped_env.action_space
     self.observation_space = self._wrapped_env.observation_space
Exemple #12
0
 def __init__(self, action_space, prob_random_action=0.1):
     Serializable.quick_init(self, locals())
     Serializable.quick_init(self, locals())
     self.prob_random_action = prob_random_action
     self.action_space = action_space
Exemple #13
0
 def __init__(self, action_space, low=0., high=1.):
     Serializable.quick_init(self, locals())
     self._low = action_space.low
     self._high = action_space.high
 def __init__(self, action_space, sigma=1.0):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     super().__init__()
     self._sigma = sigma
     self._action_space = action_space