Exemple #1
0
    def __init__(
        self,
        env,
        n_bins_obs=10,
        memory_size=100,
        state_preprocess_fn=None,
        state_preprocess_kwargs=None,
    ):
        Wrapper.__init__(self, env)

        if state_preprocess_fn is None:
            assert isinstance(env.observation_space, spaces.Box)
        assert isinstance(env.action_space, spaces.Discrete)

        self.state_preprocess_fn = state_preprocess_fn or identity
        self.state_preprocess_kwargs = state_preprocess_kwargs or {}

        self.memory = TrajectoryMemory(memory_size)
        self.total_visit_counter = DiscreteCounter(self.env.observation_space,
                                                   self.env.action_space,
                                                   n_bins_obs=n_bins_obs)
        self.episode_visit_counter = DiscreteCounter(
            self.env.observation_space,
            self.env.action_space,
            n_bins_obs=n_bins_obs)
        self.current_state = None
        self.curret_step = 0
Exemple #2
0
 def __init__(self, env):
     Wrapper.__init__(self, env, wrap_spaces=True)
     obs_space = self.env.observation_space
     assert isinstance(obs_space, Discrete)
     self.observation_space = Box(
         low=0.0, high=1.0, shape=(obs_space.n,), dtype=np.uint32
     )
Exemple #3
0
 def __init__(self, env, horizon):
     """
     Parameters
     ----------
     horizon: int
     """
     Wrapper.__init__(self, env)
     self.horizon = horizon
     assert self.horizon >= 1
     self.current_step = 0
Exemple #4
0
    def __init__(self,
                 env,
                 uncertainty_estimator_fn,
                 uncertainty_estimator_kwargs=None,
                 bonus_scale_factor=1.0,
                 bonus_max=np.inf):
        Wrapper.__init__(self, env)

        self.bonus_scale_factor = bonus_scale_factor
        self.bonus_max = bonus_max
        uncertainty_estimator_kwargs = uncertainty_estimator_kwargs or {}

        uncertainty_estimator_fn = load(uncertainty_estimator_fn) if isinstance(uncertainty_estimator_fn, str) else \
            uncertainty_estimator_fn
        self.uncertainty_estimator = uncertainty_estimator_fn(
            env.observation_space, env.action_space,
            **uncertainty_estimator_kwargs)
        self.previous_obs = None
Exemple #5
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
Exemple #6
0
 def __init__(self, env, reward_range):
     Wrapper.__init__(self, env)
     self.reward_range = reward_range
     assert reward_range[0] < reward_range[1]
     assert reward_range[0] > -np.inf and reward_range[1] < np.inf