Ejemplo n.º 1
0
 def __init__(
         self,
         env,
         reward_scale=1.,
         obs_mean=None,
         obs_std=None,
 ):
     # self._wrapped_env needs to be called first because
     # Serializable.quick_init calls getattr, on this class. And the
     # implementation of getattr (see below) calls self._wrapped_env.
     # Without setting this first, the call to self._wrapped_env would call
     # getattr again (since it's not set yet) and therefore loop forever.
     self._wrapped_env = env
     # Or else serialization gets delegated to the wrapped_env. Serialize
     # this env separately from the wrapped_env.
     self._serializable_initialized = False
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._should_normalize = not (obs_mean is None and obs_std is None)
     if self._should_normalize:
         if obs_mean is None:
             obs_mean = np.zeros_like(env.observation_space.low)
         else:
             obs_mean = np.array(obs_mean)
         if obs_std is None:
             obs_std = np.ones_like(env.observation_space.low)
         else:
             obs_std = np.array(obs_std)
     self._reward_scale = reward_scale
     self._obs_mean = obs_mean
     self._obs_std = obs_std
     ub = np.ones(self._wrapped_env.action_space.shape)
     self.action_space = Box(-1 * ub, ub)
Ejemplo n.º 2
0
    def __init__(
        self,
        env,
        policy,
        exploration_policy,
        max_path_length,
        train_rollout_function,
        eval_rollout_function,
        num_workers=2,
    ):
        Serializable.quick_init(self, locals())
        super().__init__(env)
        self.num_workers = num_workers
        # Let self.worker_limits[True] be the max number of workers for training
        # and self.worker_limits[False] be the max number of workers for eval.
        self.worker_limits = {
            True: math.ceil(self.num_workers / 2),
            False: math.ceil(self.num_workers / 2),
        }

        self.parent_pipes = []
        self.child_pipes = []

        for _ in range(num_workers):
            parent_conn, child_conn = Pipe()
            self.parent_pipes.append(parent_conn)
            self.child_pipes.append(child_conn)

        self._workers = [
            Process(target=RemoteRolloutEnv._worker_loop,
                    args=(
                        self.child_pipes[i],
                        env,
                        policy,
                        exploration_policy,
                        max_path_length,
                        cloudpickle.dumps(train_rollout_function),
                        cloudpickle.dumps(eval_rollout_function),
                    )) for i in range(num_workers)
        ]

        for worker in self._workers:
            worker.start()

        self.free_pipes = set(self.parent_pipes)
        # self.pipe_info[pipe] stores (epoch, train_type)
        self.pipe_info = {}
        # Let self.promise_list[True] be the promises for training
        # and self.promise_list[False] be the promises for eval.
        self.rollout_promise_list = {
            True: [],
            False: [],
        }
Ejemplo n.º 3
0
 def __init__(self,
              action_space,
              max_sigma=1.0,
              min_sigma=None,
              decay_period=1000000):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = action_space
Ejemplo n.º 4
0
 def __init__(self, env):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self.action_space = convert_space_to_tf_space(
         self._wrapped_env.action_space
     )
     self.observation_space = convert_space_to_tf_space(
         self._wrapped_env.observation_space
     )
     from rllab.envs.env_spec import EnvSpec
     self.spec = EnvSpec(
         observation_space=self.observation_space,
         action_space=self.action_space,
     )
Ejemplo n.º 5
0
    def save_init_params(self, locals):
        """
        Should call this FIRST THING in the __init__ method if you ever want
        to serialize or clone this network.

        Usage:
        ```
        def __init__(self, ...):
            self.init_serialization(locals())
            ...
        ```
        :param locals:
        :return:
        """
        Serializable.quick_init(self, locals)
Ejemplo n.º 6
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
     self.action_space = self._wrapped_env.action_space
     self.observation_space = self._wrapped_env.observation_space
Ejemplo n.º 7
0
 def __init__(self, action_space, prob_random_action=0.1):
     Serializable.quick_init(self, locals())
     Serializable.quick_init(self, locals())
     self.prob_random_action = prob_random_action
     self.action_space = action_space
Ejemplo n.º 8
0
 def __init__(self, action_space, low=0., high=1.):
     Serializable.quick_init(self, locals())
     self._low = action_space.low
     self._high = action_space.high
Ejemplo n.º 9
0
 def __init__(self, action_space, sigma=1.0):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     super().__init__()
     self._sigma = sigma
     self._action_space = action_space