def __init__( self, env, reward_scale=1., obs_mean=None, obs_std=None, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._should_normalize = not (obs_mean is None and obs_std is None) if self._should_normalize: if obs_mean is None: obs_mean = np.zeros_like(env.observation_space.low) else: obs_mean = np.array(obs_mean) if obs_std is None: obs_std = np.ones_like(env.observation_space.low) else: obs_std = np.array(obs_std) self._reward_scale = reward_scale self._obs_mean = obs_mean self._obs_std = obs_std ub = np.ones(self._wrapped_env.action_space.shape) self.action_space = Box(-1 * ub, ub)
def __init__( self, env, policy, exploration_policy, max_path_length, train_rollout_function, eval_rollout_function, num_workers=2, ): Serializable.quick_init(self, locals()) super().__init__(env) self.num_workers = num_workers # Let self.worker_limits[True] be the max number of workers for training # and self.worker_limits[False] be the max number of workers for eval. self.worker_limits = { True: math.ceil(self.num_workers / 2), False: math.ceil(self.num_workers / 2), } self.parent_pipes = [] self.child_pipes = [] for _ in range(num_workers): parent_conn, child_conn = Pipe() self.parent_pipes.append(parent_conn) self.child_pipes.append(child_conn) self._workers = [ Process(target=RemoteRolloutEnv._worker_loop, args=( self.child_pipes[i], env, policy, exploration_policy, max_path_length, cloudpickle.dumps(train_rollout_function), cloudpickle.dumps(eval_rollout_function), )) for i in range(num_workers) ] for worker in self._workers: worker.start() self.free_pipes = set(self.parent_pipes) # self.pipe_info[pipe] stores (epoch, train_type) self.pipe_info = {} # Let self.promise_list[True] be the promises for training # and self.promise_list[False] be the promises for eval. self.rollout_promise_list = { True: [], False: [], }
def __init__(self, action_space, max_sigma=1.0, min_sigma=None, decay_period=1000000): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = action_space
def __init__(self, env): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.action_space = convert_space_to_tf_space( self._wrapped_env.action_space ) self.observation_space = convert_space_to_tf_space( self._wrapped_env.observation_space ) from rllab.envs.env_spec import EnvSpec self.spec = EnvSpec( observation_space=self.observation_space, action_space=self.action_space, )
def save_init_params(self, locals): """ Should call this FIRST THING in the __init__ method if you ever want to serialize or clone this network. Usage: ``` def __init__(self, ...): self.init_serialization(locals()) ... ``` :param locals: :return: """ Serializable.quick_init(self, locals)
def copy(self, copy_parameters=True): if not copy_parameters: # Basically the same code as clone, but do not set param values. assert isinstance(self, Serializable) d = Serializable.__getstate__(self) d["__kwargs"] = dict(d["__kwargs"]) out = type(self).__new__(type(self)) Serializable.__setstate__(out, d) return out copy = Serializable.clone(self) # Not actually necessary since the parameters should already be # copied, but just to be safe... ptu.copy_model_params_from_to(self, copy) return copy
def __getstate__(self): d = Serializable.__getstate__(self) # Add these explicitly in case they were modified d["_obs_mean"] = self._obs_mean d["_obs_std"] = self._obs_std d["_reward_scale"] = self._reward_scale return d
def __setstate__(self, d): Serializable.__setstate__(self, d) self.set_param_values(d["params"])
def __getstate__(self): d = Serializable.__getstate__(self) d["params"] = self.get_param_values() return d
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_std = d["_obs_std"] self._reward_scale = d["_reward_scale"]
def __init__(self, wrapped_env): Serializable.quick_init(self, locals()) self._wrapped_env = wrapped_env self.action_space = self._wrapped_env.action_space self.observation_space = self._wrapped_env.observation_space
def __init__(self, action_space, prob_random_action=0.1): Serializable.quick_init(self, locals()) Serializable.quick_init(self, locals()) self.prob_random_action = prob_random_action self.action_space = action_space
def __init__(self, action_space, low=0., high=1.): Serializable.quick_init(self, locals()) self._low = action_space.low self._high = action_space.high
def __init__(self, action_space, sigma=1.0): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) super().__init__() self._sigma = sigma self._action_space = action_space