def step_wait(self) -> VecEnvStepReturn: # wield the stored action for env.step for idx, env in enumerate(self.envs): obs, self.buf_rews[idx], self.buf_dones[idx], self.buf_infos[ idx] = env.step(self.actions[idx]) if any(self.buf_dones[idx].tolist()): # save final observation where user can get it, then reset self.buf_infos[idx]["terminal_observation"] = obs obs = self.envs[idx].reset() self._save_obs(idx, obs) return (dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs)), np.copy(self.buf_rews), np.copy(self.buf_dones), copy.deepcopy(self.buf_infos))
def _obs_from_buf(self) -> VecEnvObs: return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
def reset(self) -> VecEnvObs: for env_idx, each_env in enumerate(self.envs): obs = each_env.reset() self._save_obs(env_idx, obs) return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))