def step_async(self, actions): """Send the calls to :obj:`step` to each sub-environment. Parameters ---------- actions : element of :attr:`~VectorEnv.action_space` Batch of actions. Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). AlreadyPendingCallError If the environment is already waiting for a pending call to another method (e.g. :meth:`reset_async`). This can be caused by two consecutive calls to :meth:`step_async`, with no call to :meth:`step_wait` in between. """ self._assert_is_running() if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( f"Calling `step_async` while waiting for a pending call to `{self._state.value}` to complete.", self._state.value, ) actions = iterate(self.action_space, actions) for pipe, action in zip(self.parent_pipes, actions): pipe.send(("step", action)) self._state = AsyncState.WAITING_STEP
def write_observations(vec_env, env_start_idx, shared_obs, obs): obs = list(iterate(vec_env.observation_space, obs)) for i in range(vec_env.num_envs): write_to_shared_memory( vec_env.observation_space, env_start_idx + i, obs[i], shared_obs, )
def concat_obs(self, observations): return concatenate( self.observation_space, [ item for obs in observations for item in iterate(self.observation_space, obs) ], create_empty_array(self.observation_space, n=self.num_envs), )
def step(self, actions): data = [] idx = 0 actions = list(iterate(self.action_space, actions)) for venv in self.vec_envs: data.append( venv.step( self.concatenate_actions( actions[idx : idx + venv.num_envs], venv.num_envs ) ) ) idx += venv.num_envs observations, rewards, dones, infos = transpose(data) observations = self.concat_obs(observations) rewards = np.concatenate(rewards, axis=0) dones = np.concatenate(dones, axis=0) infos = sum(infos, []) return observations, rewards, dones, infos
def step(self, actions): actions = list(iterate(self.action_space, actions)) agent_set = set(self.par_env.agents) act_dict = { agent: actions[i] for i, agent in enumerate(self.par_env.possible_agents) if agent in agent_set } observations, rewards, dones, infos = self.par_env.step(act_dict) # adds last observation to info where user can get it if all(dones.values()): for agent, obs in observations.items(): infos[agent]["terminal_observation"] = obs rews = np.array( [rewards.get(agent, 0) for agent in self.par_env.possible_agents], dtype=np.float32, ) dns = np.array( [ dones.get(agent, False) for agent in self.par_env.possible_agents ], dtype=np.uint8, ) infs = [infos.get(agent, {}) for agent in self.par_env.possible_agents] if all(dones.values()): observations = self.reset() else: observations = self.concat_obs(observations) assert ( self.black_death or self.par_env.agents == self.par_env.possible_agents ), "MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True" return observations, rews, dns, infs
def step_async(self, actions): self._actions = iterate(self.action_space, actions)
def step_async(self, actions): """Sets :attr:`_actions` for use by the :meth:`step_wait` by converting the ``actions`` to an iterable version.""" self._actions = iterate(self.action_space, actions)
def step_async(self, actions): actions = list(iterate(self.action_space, actions)) for i, pipe in enumerate(self.pipes): start, end = self.idx_starts[i:i + 2] pipe.send(("step", actions[start:end]))