def step_wait(self): observations, rewards, self._dones, infos = self.venv.step_wait() observations, self._obs = _tuple_pop(observations, self._agent_to_fix) for env_idx in range(self.num_envs): env_data = {k: v[env_idx] for k, v in self._data.items()} infos[env_idx][self._agent_to_fix].update(env_data) return observations, rewards, self._dones, infos
def reset(self): observations = self.venv.reset() observations, self._obs = _tuple_pop(observations, self._agent_to_fix) return observations
def step_wait(self): observations, rewards, self._dones, infos = self.venv.step_wait() observations, self._obs = _tuple_pop(observations, self._agent_to_fix) rewards, _ = _tuple_pop(rewards, self._agent_to_fix) return observations, rewards, self._dones, infos