def step(self, model_states: StatesModel, env_states: StatesEnv) -> StatesEnv: """ Set the environment to the target states by applying the specified \ actions an arbitrary number of time steps. The state transitions will be calculated in parallel. Args: model_states: :class:`StatesModel` representing the data to be used \ to act on the environment. env_states: :class:`StatesEnv` representing the data to be set in \ the environment. Returns: :class:`StatesEnv` containing the information that describes the \ new state of the Environment. """ split_env_states = [ env.step.remote(model_states=ms, env_states=es) for env, ms, es in zip( self.envs, model_states.split_states(self.n_workers), env_states.split_states(self.n_workers), ) ] env_states = ray.get(split_env_states) new_env_states: StatesEnv = StatesEnv.merge_states(env_states) return new_env_states
def step(self, model_states: StatesModel, env_states: StatesEnv) -> StatesEnv: """ Forward a batch of actions to the wrapped environments. Args: model_states: States representing the data to be used to act on the environment. env_states: States representing the data to be set in the environment. Returns: Batch of observations, rewards, and done flags. """ split_states = self._make_transitions(model_states=model_states, env_states=env_states) states: StatesEnv = StatesEnv.merge_states(split_states) return states