def __init__( self, name, env, dynamics_model, reward_model=None, discount=1, use_cem=False, n_candidates=1024, horizon=10, use_reward_model=False, num_rollouts=10, context=False, mcl_cadm=False, ): self.dynamics_model = dynamics_model # dynamics_model self.reward_model = reward_model # None self.discount = discount # 1 self.n_candidates = n_candidates # 2000 self.horizon = horizon # 30 self.use_cem = use_cem # False self.env = env # OurHalfCheetahEnv self.use_reward_model = use_reward_model # False self.context = context self.mcl_cadm = mcl_cadm self.unwrapped_env = env # OurHalfCheetahEnv while hasattr(self.unwrapped_env, "wrapped_env"): self.unwrapped_env = self.unwrapped_env.wrapped_env # make sure that enc has reward function assert hasattr(self.unwrapped_env, "reward"), "env must have a reward function" Serializable.quick_init(self, locals()) super(MPCController, self).__init__(env=env)
def __init__( self, env, scale_reward=1.0, normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=1.0, dummy_flag=False, ): Serializable.quick_init(self, locals()) self._scale_reward = 1 self._wrapped_env = env self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(self.observation_space.shape) self._obs_var = np.ones(self.observation_space.shape) self._reward_alpha = reward_alpha self._reward_mean = 0.0 self._reward_var = 1.0 self._normalization_scale = normalization_scale self._dummy_flag = dummy_flag
def __init__(self, observation_space, action_space): """ :type observation_space: Space :type action_space: Space """ Serializable.quick_init(self, locals()) self._observation_space = observation_space self._action_space = action_space
def __init__(self, *args, **kwargs): # store the init args for serialization and call the super constructors Serializable.quick_init(self, locals()) Layer.__init__(self, *args, **kwargs) self.build_graph()
def __init__(self, env): Serializable.quick_init(self, locals()) self.env = env while hasattr(self.env, "wrapped_env"): self.env = self.env.wrapped_env
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_var = d["_obs_var"]
def __getstate__(self): d = Serializable.__getstate__(self) d["_obs_mean"] = self._obs_mean d["_obs_var"] = self._obs_var return d