def reset_wait(self, timeout=None): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError('Calling `reset_wait` without any prior ' 'call to `reset_async`.', AsyncState.WAITING_RESET.value) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError('The call to `reset_wait` has timed out after ' '{0} second{1}.'.format(timeout, 's' if timeout > 1 else '')) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if not self.shared_memory: concatenate(results, self.observations, self.single_observation_space) return deepcopy(self.observations) if self.copy else self.observations
def reset_wait( self, timeout: Optional[Union[int, float]] = None, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, List[dict]]]: """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results. Args: timeout: Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. seed: ignored return_info: If to return information options: ignored Returns: A tuple of batched observations and list of dictionaries Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError: If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`. TimeoutError: If :meth:`reset_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `reset_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if return_info: infos = {} results, info_data = zip(*results) for i, info in enumerate(info_data): infos = self._add_info(infos, info, i) if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return (deepcopy(self.observations) if self.copy else self.observations), infos else: if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return deepcopy( self.observations) if self.copy else self.observations
def step_wait(self): observations_list, infos = [], [] batch_ids, j = [], 0 num_actions = len(self._actions) rewards = np.zeros((num_actions, ), dtype=np.float_) for i, env in enumerate(self.envs): if self._dones[i]: continue action = self._actions[j] observation, rewards[j], self._dones[i], info = env.step(action) batch_ids.append(i) if not self._dones[i]: observations_list.append(observation) infos.append(info) j += 1 assert num_actions == j if observations_list: observations = create_empty_array(self.single_observation_space, n=len(observations_list), fn=np.zeros) concatenate(observations_list, observations, self.single_observation_space) else: observations = None return (observations, rewards, np.copy(self._dones), { 'batch_ids': batch_ids, 'infos': infos })
def step(self, actions): """ Parameters ---------- actions : iterable of samples from `action_space` List of actions. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. rewards : `np.ndarray` instance (dtype `np.float_`) A vector of rewards from the vectorized environment. dones : `np.ndarray` instance (dtype `np.bool_`) A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic informations. """ observations, infos = [], [] for i, (env, action) in enumerate(zip(self.envs, actions)): observation, self._rewards[i], self._dones[i], info = env.step( action) if self._dones[i]: observation = env.reset() observations.append(observation) infos.append(info) concatenate(observations, self.observations, self.single_observation_space) return (deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._dones), infos)
def reset_wait(self): self._dones[:] = False observations = [] for env in self.envs: observation = env.reset() observations.append(observation) concatenate(observations, self.observations, self.single_observation_space) return np.copy(self.observations) if self.copy else self.observations
async def async_reset(self): self._dones[:] = False observations = await (asyncio.gather( *[call_wrapped_async_reset(env) for env in self.envs])) try: concatenate(observations, self.observations, self.single_observation_space) except ValueError: print([n.shape for n in observations]) raise return np.copy(self.observations) if self.copy else self.observations
def step_wait(self): observations, infos = [], [] for i, (env, action) in enumerate(zip(self.envs, self._actions)): observation, self._rewards[i], self._dones[i], info = env.step( action) if self._dones[i]: observation = env.reset() observations.append(observation) infos.append(info) concatenate(observations, self.observations, self.single_observation_space) return (deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._dones), infos)
def step_wait(self, **kwargs) -> Tuple[ Any, np.ndarray, np.ndarray, Tuple[Dict[str, Any]]]: observation, self._reward[0], self._terminal[0], info = self.env.step(self._action) if self._terminal[0]: observation = self.env.reset() self._observation = concatenate([observation], self._observation, self.single_observation_space) return self._observation, np.copy(self._reward), np.copy(self._terminal), (info,)
def reset(self): """ Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. """ self._dones[:] = False observations = [] for env in self.envs: observation = env.reset() observations.append(observation) concatenate(observations, self.observations, self.single_observation_space) return np.copy(self.observations) if self.copy else self.observations
def step_wait( self, timeout: Optional[Union[int, float]] = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[dict]]: """Wait for the calls to :obj:`step` in each sub-environment to finish. Args: timeout: Number of seconds before the call to :meth:`step_wait` times out. If ``None``, the call to :meth:`step_wait` never times out. Returns: The batched environment step information, obs, reward, done and info Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError: If :meth:`step_wait` was called without any prior call to :meth:`step_async`. TimeoutError: If :meth:`step_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( "Calling `step_wait` without any prior call " "to `step_async`.", AsyncState.WAITING_STEP.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `step_wait` has timed out after {timeout} second(s)." ) observations_list, rewards, dones, infos = [], [], [], {} successes = [] for i, pipe in enumerate(self.parent_pipes): result, success = pipe.recv() obs, rew, done, info = result successes.append(success) observations_list.append(obs) rewards.append(rew) dones.append(done) infos = self._add_info(infos, info, i) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if not self.shared_memory: self.observations = concatenate( self.single_observation_space, observations_list, self.observations, ) return ( deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(dones, dtype=np.bool_), infos, )
def step_wait(self, timeout=None): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `step_wait` times out. If `None`, the call to `step_wait` never times out. Returns ------- observations : sample from `observation_space` A batch of observations from the vectorized environment. rewards : `np.ndarray` instance (dtype `np.float_`) A vector of rewards from the vectorized environment. dones : `np.ndarray` instance (dtype `np.bool_`) A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic informations. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( 'Calling `step_wait` without any prior call ' 'to `step_async`.', AsyncState.WAITING_STEP.value) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( 'The call to `step_wait` has timed out after ' '{0} second{1}.'.format(timeout, 's' if timeout > 1 else '')) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT observations_list, rewards, dones, infos = zip(*results) if not self.shared_memory: concatenate(observations_list, self.observations, self.single_observation_space) return (deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(dones, dtype=np.bool_), infos)
async def async_step(self, actions): observations, infos = [], [] p = await (asyncio.gather( *[ asyncio.wait_for(call_wrapped_async_step(env, action), timeout=1.0) for env, action in zip(self.envs, actions) ], return_exceptions=True, )) for i, result in enumerate(p): env = self.envs[i] if isinstance(result, Exception): print("Error:", type(result), result, self.envs[i].task) # traceback.print_exception(result) observation, self._rewards[i], self._dones[i], info = [ call_wrapped_async_reset(env), 0.0, False, { "bad_transition": True }, ] else: observation, self._rewards[i], self._dones[i], info = result observations.append(observation) infos.append(info) p = await (asyncio.gather( *[o for o in observations if asyncio.iscoroutine(o)])) j = 0 for i, o in enumerate(observations): if asyncio.iscoroutine(o): observations[i] = p[j] j += 1 concatenate(observations, self.observations, self.single_observation_space) return ( deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._dones), infos, )
def concat_obs(self, observations): return concatenate( self.observation_space, [ item for obs in observations for item in iterate(self.observation_space, obs) ], create_empty_array(self.observation_space, n=self.num_envs), )
def _concatenate_typed_dicts( space: TypedDictSpace, items: Union[list, tuple], out: Union[tuple, dict, np.ndarray], ) -> Dict: return space.dtype( **{ key: concatenate(subspace, [item[key] for item in items], out=out[key]) for (key, subspace) in space.spaces.items() } )
def reset_wait( self, seed: Optional[Union[int, list[int]]] = None, return_info: bool = False, options: Optional[dict] = None, ): """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results. Args: seed: The reset environment seed return_info: If to return information options: Option information for the environment reset Returns: The reset observation of the environment and reset information """ if seed is None: seed = [None for _ in range(self.num_envs)] if isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] assert len(seed) == self.num_envs self._dones[:] = False observations = [] data_list = [] for env, single_seed in zip(self.envs, seed): kwargs = {} if single_seed is not None: kwargs["seed"] = single_seed if options is not None: kwargs["options"] = options if return_info is True: kwargs["return_info"] = return_info if not return_info: observation = env.reset(**kwargs) observations.append(observation) else: observation, data = env.reset(**kwargs) observations.append(observation) data_list.append(data) self.observations = concatenate(self.single_observation_space, observations, self.observations) if not return_info: return deepcopy( self.observations) if self.copy else self.observations else: return (deepcopy(self.observations) if self.copy else self.observations), data_list
def concat_obs(self, obs_dict): obs_list = [] for i, agent in enumerate(self.par_env.possible_agents): if agent not in obs_dict: raise AssertionError( "environment has agent death. Not allowed for pettingzoo_env_to_vec_env_v1 unless black_death is True" ) obs_list.append(obs_dict[agent]) return concatenate( self.observation_space, obs_list, create_empty_array(self.observation_space, self.num_envs), )
def reset_wait(self, timeout=None): """Wait for the calls to :obj:`reset` in each sub-environment to finish. Parameters ---------- timeout : int or float, optional Number of seconds before the call to :meth:`reset_wait` times out. If ``None``, the call to :meth:`reset_wait` never times out. Returns ------- element of :attr:`~VectorEnv.observation_space` A batch of observations from the vectorized environment. Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`. TimeoutError If :meth:`reset_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `reset_wait` has timed out after {timeout} second{'s' if timeout > 1 else ''}." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if not self.shared_memory: self.observations = concatenate( results, self.observations, self.single_observation_space ) return deepcopy(self.observations) if self.copy else self.observations
def fuse_and_batch(item_space: spaces.Space, *sequences: Sequence[Sequence[T]], n_items: int) -> Sequence[T]: # fuse the lists # print(f"Fusing {n_items} items from space {item_space}") # sequence_a, sequence_b = sequences assert all(isinstance(sequence, list) for sequence in sequences) out = create_empty_array(item_space, n=n_items) # # Concatenate the (two) batches into a single batch of samples. items_batch = np.concatenate([ np.asarray(v).reshape([-1, *item_space.shape]) for v in itertools.chain(*sequences) ]) # # Split this batch of samples into a list of items from each space. items = [ v.reshape(item_space.shape) for v in np.split(items_batch, n_items) ] # TODO: Need to add more tests to make sure this works with custom spaces and Dict spaces. return concatenate(items, out, item_space)
def step_wait(self): observations, infos = [], [] for i, (env, action) in enumerate(zip(self.envs, self._actions)): observation, self._rewards[i], self._dones[i], info = env.step( action) if info.get("TimeLimit.truncated", False): info["TimeLimit.next_obs"] = observation if self._dones[i]: observation = env.reset() observations.append(observation) infos.append(info) self.observations = concatenate(observations, self.observations, self.single_observation_space) return ( deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._dones), infos, )
def reset_wait( self, seed: Optional[Union[int, List[int]]] = None, return_info: bool = False, options: Optional[dict] = None, ): if seed is None: seed = [None for _ in range(self.num_envs)] if isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] assert len(seed) == self.num_envs self._dones[:] = False observations = [] data_list = [] for env, single_seed in zip(self.envs, seed): kwargs = {} if single_seed is not None: kwargs["seed"] = single_seed if options is not None: kwargs["options"] = options if return_info == True: kwargs["return_info"] = return_info if not return_info: observation = env.reset(**kwargs) observations.append(observation) else: observation, data = env.reset(**kwargs) observations.append(observation) data_list.append(data) self.observations = concatenate(self.single_observation_space, observations, self.observations) if not return_info: return deepcopy( self.observations) if self.copy else self.observations else: return (deepcopy(self.observations) if self.copy else self.observations), data_list
def dict_vec_env_test(env): # tests that environment really is a vectorized # version of the environment returned by make_env obss = env.reset() for i in range(55): actions = [env.action_space.sample() for i in range(env.num_envs)] actions = concatenate( env.action_space, actions, create_empty_array(env.action_space, env.num_envs), ) obss, rews, dones, infos = env.step(actions) assert obss["feature"][1][0] == 1 assert { "feature": obss["feature"][1][:], "id": [o[1] for o in obss["id"]], } in env.observation_space # no agent death, only env death if any(dones): assert all(dones)
def step_wait(self): """Steps through each of the environments returning the batched results. Returns: The batched environment step results """ observations, infos = [], [] for i, (env, action) in enumerate(zip(self.envs, self._actions)): observation, self._rewards[i], self._dones[i], info = env.step( action) if self._dones[i]: info["terminal_observation"] = observation observation = env.reset() observations.append(observation) infos.append(info) self.observations = concatenate(self.single_observation_space, observations, self.observations) return ( deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._dones), infos, )
def reset_wait(self, **kwargs) -> Any: observation = self.env.reset() self._observation = concatenate([observation], self._observation, self.single_observation_space) return self._observation
def step_wait(self, timeout=None): """Wait for the calls to :obj:`step` in each sub-environment to finish. Parameters ---------- timeout : int or float, optional Number of seconds before the call to :meth:`step_wait` times out. If ``None``, the call to :meth:`step_wait` never times out. Returns ------- observations : element of :attr:`~VectorEnv.observation_space` A batch of observations from the vectorized environment. rewards : :obj:`np.ndarray`, dtype :obj:`np.float_` A vector of rewards from the vectorized environment. dones : :obj:`np.ndarray`, dtype :obj:`np.bool_` A vector whose entries indicate whether the episode has ended. infos : list of dict A list of auxiliary diagnostic information dicts from sub-environments. Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError If :meth:`step_wait` was called without any prior call to :meth:`step_async`. TimeoutError If :meth:`step_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( "Calling `step_wait` without any prior call " "to `step_async`.", AsyncState.WAITING_STEP.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `step_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT observations_list, rewards, dones, infos = zip(*results) if not self.shared_memory: self.observations = concatenate( self.single_observation_space, observations_list, self.observations, ) return ( deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(dones, dtype=np.bool_), infos, )
def reset_wait( self, timeout=None, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ): """ Parameters ---------- timeout : int or float, optional Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. seed: ignored options: ignored Returns ------- element of :attr:`~VectorEnv.observation_space` A batch of observations from the vectorized environment. infos : list of dicts containing metadata Raises ------ ClosedEnvironmentError If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`. TimeoutError If :meth:`reset_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `reset_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if return_info: results, infos = zip(*results) infos = list(infos) if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return (deepcopy(self.observations) if self.copy else self.observations), infos else: if not self.shared_memory: self.observations = concatenate(self.single_observation_space, results, self.observations) return deepcopy( self.observations) if self.copy else self.observations
def async_loop(vec_env_constr, inpt_p, pipe, shared_obs, shared_rews, shared_dones): inpt_p.close() try: vec_env = vec_env_constr() pipe.send((vec_env.num_envs)) env_start_idx = pipe.recv() env_end_idx = env_start_idx + vec_env.num_envs while True: instr = pipe.recv() comp_infos = [] if instr == "close": vec_env.close() elif isinstance(instr, tuple): name, data = instr if name == "reset": if not data[1]: observations = vec_env.reset(seed=data[0], options=data[2]) else: observations, infos = vec_env.reset( seed=data[0], return_info=data[1], options=data[2]) comp_infos = compress_info(infos) write_observations(vec_env, env_start_idx, shared_obs, observations) shared_dones.np_arr[env_start_idx:env_end_idx] = False shared_rews.np_arr[env_start_idx:env_end_idx] = 0.0 elif name == "step": actions = data actions = concatenate( vec_env.action_space, actions, create_empty_array(vec_env.action_space, n=len(actions)), ) observations, rewards, dones, infos = vec_env.step(actions) write_observations(vec_env, env_start_idx, shared_obs, observations) shared_dones.np_arr[env_start_idx:env_end_idx] = dones shared_rews.np_arr[env_start_idx:env_end_idx] = rewards comp_infos = compress_info(infos) elif name == "env_is_wrapped": comp_infos = vec_env.env_is_wrapped(data) elif name == "render": render_result = vec_env.render(data) if data == "rgb_array": comp_infos = render_result else: raise AssertionError("bad tuple instruction name: " + name) elif instr == "terminate": return else: raise AssertionError("bad instruction: " + instr) pipe.send(comp_infos) except BaseException as e: tb = traceback.format_exc() pipe.send((e, tb))
def concatenate_actions(self, actions, n_actions): return concatenate( self.action_space, actions, create_empty_array(self.action_space, n=n_actions), )