def sample(self, env_states: States = None, batch_size: int = 1, model_states: States = None, **kwargs) -> States: actions = env_states.observs.copy( ) if env_states is not None else np.zeros((batch_size, self.dim)) actions = actions.astype(bool) # set action to all zeros for i in range(actions.shape[0]): target_name = self.random_state.choice(self.names) actions[i] = np.logical_and( actions[i], np.logical_not(self.namme_masks[target_name])) valid_ixs = np.arange( actions.shape[1])[self.name_masks[target_name]] actions[i, self.random_state.choice(valid_ixs)] = True actions = actions.astype(int) dt = (1 if self.dt_sampler is None else self.dt_sampler.calculate( batch_size=batch_size, model_states=model_states, ** kwargs).astype(int)) model_states.update(actions=actions, dt=dt) actions = model_states.actions.astype(bool) actions = np.logical_or(actions, self.force_mask) actions = self._enforce_soft_constraints(actions) model_states.update(actions=actions) return model_states
def classic_control_env(): env = ClassicControl() env.reset() env = DiscreteEnv(env) params = {"actions": {"dtype": np.int64}, "dt": {"dtype": np.float32}} states = States(state_dict=params, batch_size=N_WALKERS) states.update(actions=np.ones(N_WALKERS), dt=np.ones(N_WALKERS)) return env, states
def sample(self, env_states: States = None, batch_size: int = 1, model_states: States = None, **kwargs) -> States: actions = env_states.observs.copy( ) if env_states is not None else np.zeros((batch_size, self.dim)) actions = self.modify_actions(actions) model_states.update(actions=actions) return model_states
def atari_env(): env = AtariEnvironment(name="MsPacman-v0", clone_seeds=True, autoreset=True) env.reset() env = DiscreteEnv(env) params = { "actions": { "dtype": np.int64 }, "critic": { "dtype": np.float32 } } states = States(state_dict=params, batch_size=N_WALKERS) states.update(actions=np.ones(N_WALKERS), critic=np.ones(N_WALKERS)) return env, states