예제 #1
0
    def __init__(
        self,
        name,
        env,
        dynamics_model,
        reward_model=None,
        discount=1,
        use_cem=False,
        n_candidates=1024,
        horizon=10,
        use_reward_model=False,
        num_rollouts=10,
        context=False,
        mcl_cadm=False,
    ):
        self.dynamics_model = dynamics_model  # dynamics_model
        self.reward_model = reward_model  # None
        self.discount = discount  # 1
        self.n_candidates = n_candidates  # 2000
        self.horizon = horizon  # 30
        self.use_cem = use_cem  # False
        self.env = env  # OurHalfCheetahEnv
        self.use_reward_model = use_reward_model  # False
        self.context = context
        self.mcl_cadm = mcl_cadm

        self.unwrapped_env = env  # OurHalfCheetahEnv
        while hasattr(self.unwrapped_env, "wrapped_env"):
            self.unwrapped_env = self.unwrapped_env.wrapped_env

        # make sure that enc has reward function
        assert hasattr(self.unwrapped_env, "reward"), "env must have a reward function"

        Serializable.quick_init(self, locals())
        super(MPCController, self).__init__(env=env)
예제 #2
0
    def __init__(
        self,
        env,
        scale_reward=1.0,
        normalize_obs=False,
        normalize_reward=False,
        obs_alpha=0.001,
        reward_alpha=0.001,
        normalization_scale=1.0,
        dummy_flag=False,
    ):
        Serializable.quick_init(self, locals())

        self._scale_reward = 1
        self._wrapped_env = env

        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._obs_alpha = obs_alpha
        self._obs_mean = np.zeros(self.observation_space.shape)
        self._obs_var = np.ones(self.observation_space.shape)
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.0
        self._reward_var = 1.0
        self._normalization_scale = normalization_scale
        self._dummy_flag = dummy_flag
예제 #3
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
예제 #4
0
 def __init__(self, *args, **kwargs):
     # store the init args for serialization and call the super constructors
     Serializable.quick_init(self, locals())
     Layer.__init__(self, *args, **kwargs)
     self.build_graph()
예제 #5
0
 def __init__(self, env):
     Serializable.quick_init(self, locals())
     self.env = env
     while hasattr(self.env, "wrapped_env"):
         self.env = self.env.wrapped_env
예제 #6
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_var = d["_obs_var"]
예제 #7
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     d["_obs_mean"] = self._obs_mean
     d["_obs_var"] = self._obs_var
     return d