def step_wait(self):
     observations, rewards, self._dones, infos = self.venv.step_wait()
     observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
     for env_idx in range(self.num_envs):
         env_data = {k: v[env_idx] for k, v in self._data.items()}
         infos[env_idx][self._agent_to_fix].update(env_data)
     return observations, rewards, self._dones, infos
 def reset(self):
     observations = self.venv.reset()
     observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
     return observations
 def step_wait(self):
     observations, rewards, self._dones, infos = self.venv.step_wait()
     observations, self._obs = _tuple_pop(observations, self._agent_to_fix)
     rewards, _ = _tuple_pop(rewards, self._agent_to_fix)
     return observations, rewards, self._dones, infos