def __init__(self, env_fns, observation_space=None, action_space=None, copy=True): self.env_fns = env_fns self.envs = [env_fn() for env_fn in env_fns] self.copy = copy if (observation_space is None) or (action_space is None): observation_space = observation_space or self.envs[ 0].observation_space action_space = action_space or self.envs[0].action_space super(SyncVectorEnv, self).__init__( num_envs=len(env_fns), observation_space=observation_space, action_space=action_space, ) self._check_observation_spaces() self.observations = create_empty_array(self.single_observation_space, n=self.num_envs, fn=np.zeros) self._rewards = np.zeros((self.num_envs, ), dtype=np.float64) self._dones = np.zeros((self.num_envs, ), dtype=np.bool_) self._actions = None
def __init__( self, # env_fns, num_envs, observation_space, action_space, copy=True): # self.env_fns = env_fns # self.envs = [] self.copy = copy # if (observation_space is None) or (action_space is None): # observation_space = observation_space or self.envs[0].observation_space # action_space = action_space or self.envs[0].action_space super(SyncVectorEnv_, self).__init__(num_envs=num_envs, observation_space=observation_space, action_space=action_space) # self._check_observation_spaces() self.observations = create_empty_array(self.single_observation_space, n=self.num_envs, fn=np.zeros) self._rewards = np.zeros((self.num_envs, ), dtype=np.float64) self._dones = np.zeros((self.num_envs, ), dtype=np.bool_) self._actions = None
def step_wait(self): observations_list, infos = [], [] batch_ids, j = [], 0 num_actions = len(self._actions) rewards = np.zeros((num_actions, ), dtype=np.float_) for i, env in enumerate(self.envs): if self._dones[i]: continue action = self._actions[j] observation, rewards[j], self._dones[i], info = env.step(action) batch_ids.append(i) if not self._dones[i]: observations_list.append(observation) infos.append(info) j += 1 assert num_actions == j if observations_list: observations = create_empty_array(self.single_observation_space, n=len(observations_list), fn=np.zeros) concatenate(observations_list, observations, self.single_observation_space) else: observations = None return (observations, rewards, np.copy(self._dones), { 'batch_ids': batch_ids, 'infos': infos })
def __init__(self, envs, observation_space=None, action_space=None, copy=True): self.envs = envs self.copy = copy if (observation_space is None) or (action_space is None): _env = self.envs[0] observation_space = observation_space or _env.observation_space action_space = action_space or _env.action_space super(AsyncioVectorEnv, self).__init__( num_envs=len(self.envs), observation_space=observation_space, action_space=action_space, ) self._check_observation_spaces() self.observations = create_empty_array(self.single_observation_space, n=self.num_envs, fn=np.zeros) self._rewards = np.zeros((self.num_envs, ), dtype=np.float64) self._dones = np.zeros((self.num_envs, ), dtype=np.bool_) self._actions = None self.closed = False self.loop = asyncio.get_event_loop()
def __init__(self, env_fns, observation_space=None, action_space=None, shared_memory=True, copy=True, context=None): try: ctx = mp.get_context(context) except AttributeError: logger.warn('Context switching for `multiprocessing` is not ' 'available in Python 2. Using the default context.') ctx = mp self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy if (observation_space is None) or (action_space is None): dummy_env = env_fns[0]() observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns), observation_space=observation_space, action_space=action_space) if self.shared_memory: _obs_buffer = create_shared_memory(self.single_observation_space, n=self.num_envs) self.observations = read_from_shared_memory( _obs_buffer, self.single_observation_space, n=self.num_envs) else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name='Worker<{0}>-{1}'.format(type(self).__name__, idx), args=(idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue)) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = True process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_observation_spaces()
def __init__(self, env: Env) -> None: assert not isinstance(env, VectorEnv) super().__init__(1, env.observation_space, env.action_space) self.action_space = batch_space(env.action_space, 1) self.env = env self._observation = create_empty_array(env.observation_space, n=1, fn=np.zeros) self._reward = np.zeros((1,), dtype=np.float64) self._terminal = np.zeros((1,), dtype=np.bool_) self._action = None
def concat_obs(self, observations): return concatenate( self.observation_space, [ item for obs in observations for item in iterate(self.observation_space, obs) ], create_empty_array(self.observation_space, n=self.num_envs), )
def __init__(self, env_fns, observation_space=None, action_space=None, shared_memory=True, copy=True, context=None, daemon=True, worker=None): ctx = mp.get_context(context) self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy if (observation_space is None) or (action_space is None): dummy_env = env_fns[0]() observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns), observation_space=observation_space, action_space=action_space) if self.shared_memory: try: _obs_buffer = create_shared_memory(self.single_observation_space, n=self.num_envs, ctx=ctx) self.observations = read_from_shared_memory(_obs_buffer, self.single_observation_space, n=self.num_envs) except CustomSpaceError: raise ValueError('Using `shared_memory=True` in `AsyncVectorEnv` ' 'is incompatible with non-standard Gym observation spaces ' '(i.e. custom spaces inheriting from `gym.Space`), and is ' 'only compatible with default Gym spaces (e.g. `Box`, ' '`Tuple`, `Dict`) for batching. Set `shared_memory=False` ' 'if you use custom observation spaces.') else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process(target=target, name='Worker<{0}>-{1}'.format(type(self).__name__, idx), args=(idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue)) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_observation_spaces()
def concat_obs(self, obs_dict): obs_list = [] for i, agent in enumerate(self.par_env.possible_agents): if agent not in obs_dict: raise AssertionError( "environment has agent death. Not allowed for pettingzoo_env_to_vec_env_v1 unless black_death is True" ) obs_list.append(obs_dict[agent]) return concatenate( self.observation_space, obs_list, create_empty_array(self.observation_space, self.num_envs), )
def fuse_and_batch(item_space: spaces.Space, *sequences: Sequence[Sequence[T]], n_items: int) -> Sequence[T]: # fuse the lists # print(f"Fusing {n_items} items from space {item_space}") # sequence_a, sequence_b = sequences assert all(isinstance(sequence, list) for sequence in sequences) out = create_empty_array(item_space, n=n_items) # # Concatenate the (two) batches into a single batch of samples. items_batch = np.concatenate([ np.asarray(v).reshape([-1, *item_space.shape]) for v in itertools.chain(*sequences) ]) # # Split this batch of samples into a list of items from each space. items = [ v.reshape(item_space.shape) for v in np.split(items_batch, n_items) ] # TODO: Need to add more tests to make sure this works with custom spaces and Dict spaces. return concatenate(items, out, item_space)
def dict_vec_env_test(env): # tests that environment really is a vectorized # version of the environment returned by make_env obss = env.reset() for i in range(55): actions = [env.action_space.sample() for i in range(env.num_envs)] actions = concatenate( env.action_space, actions, create_empty_array(env.action_space, env.num_envs), ) obss, rews, dones, infos = env.step(actions) assert obss["feature"][1][0] == 1 assert { "feature": obss["feature"][1][:], "id": [o[1] for o in obss["id"]], } in env.observation_space # no agent death, only env death if any(dones): assert all(dones)
def __init__( self, env_fns: Iterator[callable], observation_space: Space = None, action_space: Space = None, copy: bool = True, ): """Vectorized environment that serially runs multiple environments. Args: env_fns: iterable of callable functions that create the environments. observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken. action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken. copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations. Raises: RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment). """ self.env_fns = env_fns self.envs = [env_fn() for env_fn in env_fns] self.copy = copy self.metadata = self.envs[0].metadata if (observation_space is None) or (action_space is None): observation_space = observation_space or self.envs[ 0].observation_space action_space = action_space or self.envs[0].action_space super().__init__( num_envs=len(self.envs), observation_space=observation_space, action_space=action_space, ) self._check_spaces() self.observations = create_empty_array(self.single_observation_space, n=self.num_envs, fn=np.zeros) self._rewards = np.zeros((self.num_envs, ), dtype=np.float64) self._dones = np.zeros((self.num_envs, ), dtype=np.bool_) self._actions = None
def __init__( self, env_fns: Sequence[callable], observation_space: Optional[gym.Space] = None, action_space: Optional[gym.Space] = None, shared_memory: bool = True, copy: bool = True, context: Optional[str] = None, daemon: bool = True, worker: Optional[callable] = None, ): """Vectorized environment that runs multiple environments in parallel. Args: env_fns: Functions that create the environments. observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken. action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken. shared_memory: If ``True``, then the observations from the worker processes are communicated back through shared variables. This can improve the efficiency if the observations are large (e.g. images). copy: If ``True``, then the :meth:`~AsyncVectorEnv.reset` and :meth:`~AsyncVectorEnv.step` methods return a copy of the observations. context: Context for `multiprocessing`_. If ``None``, then the default context is used. daemon: If ``True``, then subprocesses have ``daemon`` flag turned on; that is, they will quit if the head process quits. However, ``daemon=True`` prevents subprocesses to spawn children, so for some environments you may want to have it set to ``False``. worker: If set, then use that worker in a subprocess instead of a default one. Can be useful to override some inner vector env logic, for instance, how resets on done are handled. Warnings: worker is an advanced mode option. It provides a high degree of flexibility and a high chance to shoot yourself in the foot; thus, if you are writing your own worker, it is recommended to start from the code for ``_worker`` (or ``_worker_shared_memory``) method, and add changes. Raises: RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment). ValueError: If observation_space is a custom space (i.e. not a default space in Gym, such as gym.spaces.Box, gym.spaces.Discrete, or gym.spaces.Dict) and shared_memory is True. """ ctx = mp.get_context(context) self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy dummy_env = env_fns[0]() self.metadata = dummy_env.metadata if (observation_space is None) or (action_space is None): observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super().__init__( num_envs=len(env_fns), observation_space=observation_space, action_space=action_space, ) if self.shared_memory: try: _obs_buffer = create_shared_memory( self.single_observation_space, n=self.num_envs, ctx=ctx) self.observations = read_from_shared_memory( self.single_observation_space, _obs_buffer, n=self.num_envs) except CustomSpaceError: raise ValueError( "Using `shared_memory=True` in `AsyncVectorEnv` " "is incompatible with non-standard Gym observation spaces " "(i.e. custom spaces inheriting from `gym.Space`), and is " "only compatible with default Gym spaces (e.g. `Box`, " "`Tuple`, `Dict`) for batching. Set `shared_memory=False` " "if you use custom observation spaces.") else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name=f"Worker<{type(self).__name__}>-{idx}", args=( idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue, ), ) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_spaces()
def concatenate_actions(self, actions, n_actions): return concatenate( self.action_space, actions, create_empty_array(self.action_space, n=n_actions), )
def async_loop(vec_env_constr, inpt_p, pipe, shared_obs, shared_rews, shared_dones): inpt_p.close() try: vec_env = vec_env_constr() pipe.send((vec_env.num_envs)) env_start_idx = pipe.recv() env_end_idx = env_start_idx + vec_env.num_envs while True: instr = pipe.recv() comp_infos = [] if instr == "close": vec_env.close() elif isinstance(instr, tuple): name, data = instr if name == "reset": if not data[1]: observations = vec_env.reset(seed=data[0], options=data[2]) else: observations, infos = vec_env.reset( seed=data[0], return_info=data[1], options=data[2]) comp_infos = compress_info(infos) write_observations(vec_env, env_start_idx, shared_obs, observations) shared_dones.np_arr[env_start_idx:env_end_idx] = False shared_rews.np_arr[env_start_idx:env_end_idx] = 0.0 elif name == "step": actions = data actions = concatenate( vec_env.action_space, actions, create_empty_array(vec_env.action_space, n=len(actions)), ) observations, rewards, dones, infos = vec_env.step(actions) write_observations(vec_env, env_start_idx, shared_obs, observations) shared_dones.np_arr[env_start_idx:env_end_idx] = dones shared_rews.np_arr[env_start_idx:env_end_idx] = rewards comp_infos = compress_info(infos) elif name == "env_is_wrapped": comp_infos = vec_env.env_is_wrapped(data) elif name == "render": render_result = vec_env.render(data) if data == "rgb_array": comp_infos = render_result else: raise AssertionError("bad tuple instruction name: " + name) elif instr == "terminate": return else: raise AssertionError("bad instruction: " + instr) pipe.send(comp_infos) except BaseException as e: tb = traceback.format_exc() pipe.send((e, tb))