Exemple #1
0
    def sample(self,
               env_states: States = None,
               batch_size: int = 1,
               model_states: States = None,
               **kwargs) -> States:
        actions = env_states.observs.copy(
        ) if env_states is not None else np.zeros((batch_size, self.dim))
        actions = actions.astype(bool)
        # set action to all zeros
        for i in range(actions.shape[0]):
            target_name = self.random_state.choice(self.names)
            actions[i] = np.logical_and(
                actions[i], np.logical_not(self.namme_masks[target_name]))
            valid_ixs = np.arange(
                actions.shape[1])[self.name_masks[target_name]]
            actions[i, self.random_state.choice(valid_ixs)] = True
        actions = actions.astype(int)
        dt = (1 if self.dt_sampler is None else self.dt_sampler.calculate(
            batch_size=batch_size, model_states=model_states, **
            kwargs).astype(int))
        model_states.update(actions=actions, dt=dt)

        actions = model_states.actions.astype(bool)
        actions = np.logical_or(actions, self.force_mask)
        actions = self._enforce_soft_constraints(actions)
        model_states.update(actions=actions)
        return model_states
Exemple #2
0
 def classic_control_env():
     env = ClassicControl()
     env.reset()
     env = DiscreteEnv(env)
     params = {"actions": {"dtype": np.int64}, "dt": {"dtype": np.float32}}
     states = States(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=np.ones(N_WALKERS), dt=np.ones(N_WALKERS))
     return env, states
Exemple #3
0
 def sample(self,
            env_states: States = None,
            batch_size: int = 1,
            model_states: States = None,
            **kwargs) -> States:
     actions = env_states.observs.copy(
     ) if env_states is not None else np.zeros((batch_size, self.dim))
     actions = self.modify_actions(actions)
     model_states.update(actions=actions)
     return model_states
 def test_step(self, env):
     states = env.reset()
     actions = States(actions=np.ones((1, 2)) * 2,
                      batch_size=1,
                      dt=np.ones((1, 2)))
     new_states: StatesEnv = env.step(actions, states)
     assert isinstance(new_states, States)
     assert new_states.rewards[0].item() == 1
Exemple #5
0
 def atari_env():
     env = AtariEnvironment(name="MsPacman-v0",
                            clone_seeds=True,
                            autoreset=True)
     env.reset()
     env = DiscreteEnv(env)
     params = {
         "actions": {
             "dtype": np.int64
         },
         "critic": {
             "dtype": np.float32
         }
     }
     states = States(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=np.ones(N_WALKERS), critic=np.ones(N_WALKERS))
     return env, states
Exemple #6
0
 def calculate(
     self,
     batch_size: int = None,
     model_states: StatesModel = None,
     env_states: StatesEnv = None,
     walkers_states: StatesWalkers = None,
 ) -> States:
     batch_size = batch_size or env_states.n
     return States(batch_size=batch_size,
                   critic_score=5 * numpy.ones(batch_size))
Exemple #7
0
 def function_env():
     bounds = Bounds(shape=(2, ), high=1, low=1, dtype=int)
     env = Function(function=lambda x: np.ones(N_WALKERS), bounds=bounds)
     params = {
         "actions": {
             "dtype": np.int64,
             "size": (2, )
         },
         "dt": {
             "dtype": np.float32
         }
     }
     states = States(state_dict=params, batch_size=N_WALKERS)
     return env, states