コード例 #1
0
    def __init__(self,
                 env_fns,
                 observation_space=None,
                 action_space=None,
                 copy=True):
        self.env_fns = env_fns
        self.envs = [env_fn() for env_fn in env_fns]
        self.copy = copy

        if (observation_space is None) or (action_space is None):
            observation_space = observation_space or self.envs[
                0].observation_space
            action_space = action_space or self.envs[0].action_space
        super(SyncVectorEnv, self).__init__(
            num_envs=len(env_fns),
            observation_space=observation_space,
            action_space=action_space,
        )

        self._check_observation_spaces()
        self.observations = create_empty_array(self.single_observation_space,
                                               n=self.num_envs,
                                               fn=np.zeros)
        self._rewards = np.zeros((self.num_envs, ), dtype=np.float64)
        self._dones = np.zeros((self.num_envs, ), dtype=np.bool_)
        self._actions = None
コード例 #2
0
ファイル: mw_sync_env.py プロジェクト: joncrawf/mime
    def __init__(
            self,
            # env_fns,
            num_envs,
            observation_space,
            action_space,
            copy=True):
        # self.env_fns = env_fns
        # self.envs = []
        self.copy = copy

        # if (observation_space is None) or (action_space is None):
        #     observation_space = observation_space or self.envs[0].observation_space
        #     action_space = action_space or self.envs[0].action_space
        super(SyncVectorEnv_,
              self).__init__(num_envs=num_envs,
                             observation_space=observation_space,
                             action_space=action_space)

        # self._check_observation_spaces()
        self.observations = create_empty_array(self.single_observation_space,
                                               n=self.num_envs,
                                               fn=np.zeros)
        self._rewards = np.zeros((self.num_envs, ), dtype=np.float64)
        self._dones = np.zeros((self.num_envs, ), dtype=np.bool_)
        self._actions = None
コード例 #3
0
ファイル: sync_vector_env.py プロジェクト: imhgchoi/MAML-RL
    def step_wait(self):
        observations_list, infos = [], []
        batch_ids, j = [], 0
        num_actions = len(self._actions)
        rewards = np.zeros((num_actions, ), dtype=np.float_)
        for i, env in enumerate(self.envs):
            if self._dones[i]:
                continue

            action = self._actions[j]
            observation, rewards[j], self._dones[i], info = env.step(action)
            batch_ids.append(i)

            if not self._dones[i]:
                observations_list.append(observation)
                infos.append(info)
            j += 1
        assert num_actions == j

        if observations_list:
            observations = create_empty_array(self.single_observation_space,
                                              n=len(observations_list),
                                              fn=np.zeros)
            concatenate(observations_list, observations,
                        self.single_observation_space)
        else:
            observations = None

        return (observations, rewards, np.copy(self._dones), {
            'batch_ids': batch_ids,
            'infos': infos
        })
コード例 #4
0
    def __init__(self,
                 envs,
                 observation_space=None,
                 action_space=None,
                 copy=True):
        self.envs = envs
        self.copy = copy

        if (observation_space is None) or (action_space is None):
            _env = self.envs[0]
            observation_space = observation_space or _env.observation_space
            action_space = action_space or _env.action_space
        super(AsyncioVectorEnv, self).__init__(
            num_envs=len(self.envs),
            observation_space=observation_space,
            action_space=action_space,
        )

        self._check_observation_spaces()
        self.observations = create_empty_array(self.single_observation_space,
                                               n=self.num_envs,
                                               fn=np.zeros)
        self._rewards = np.zeros((self.num_envs, ), dtype=np.float64)
        self._dones = np.zeros((self.num_envs, ), dtype=np.bool_)
        self._actions = None
        self.closed = False
        self.loop = asyncio.get_event_loop()
コード例 #5
0
    def __init__(self,
                 env_fns,
                 observation_space=None,
                 action_space=None,
                 shared_memory=True,
                 copy=True,
                 context=None):
        try:
            ctx = mp.get_context(context)
        except AttributeError:
            logger.warn('Context switching for `multiprocessing` is not '
                        'available in Python 2. Using the default context.')
            ctx = mp
        self.env_fns = env_fns
        self.shared_memory = shared_memory
        self.copy = copy

        if (observation_space is None) or (action_space is None):
            dummy_env = env_fns[0]()
            observation_space = observation_space or dummy_env.observation_space
            action_space = action_space or dummy_env.action_space
            dummy_env.close()
            del dummy_env
        super(AsyncVectorEnv,
              self).__init__(num_envs=len(env_fns),
                             observation_space=observation_space,
                             action_space=action_space)

        if self.shared_memory:
            _obs_buffer = create_shared_memory(self.single_observation_space,
                                               n=self.num_envs)
            self.observations = read_from_shared_memory(
                _obs_buffer, self.single_observation_space, n=self.num_envs)
        else:
            _obs_buffer = None
            self.observations = create_empty_array(
                self.single_observation_space, n=self.num_envs, fn=np.zeros)

        self.parent_pipes, self.processes = [], []
        self.error_queue = ctx.Queue()
        target = _worker_shared_memory if self.shared_memory else _worker
        with clear_mpi_env_vars():
            for idx, env_fn in enumerate(self.env_fns):
                parent_pipe, child_pipe = ctx.Pipe()
                process = ctx.Process(
                    target=target,
                    name='Worker<{0}>-{1}'.format(type(self).__name__, idx),
                    args=(idx, CloudpickleWrapper(env_fn), child_pipe,
                          parent_pipe, _obs_buffer, self.error_queue))

                self.parent_pipes.append(parent_pipe)
                self.processes.append(process)

                process.daemon = True
                process.start()
                child_pipe.close()

        self._state = AsyncState.DEFAULT
        self._check_observation_spaces()
コード例 #6
0
 def __init__(self, env: Env) -> None:
     assert not isinstance(env, VectorEnv)
     super().__init__(1, env.observation_space, env.action_space)
     self.action_space = batch_space(env.action_space, 1)
     self.env = env
     self._observation = create_empty_array(env.observation_space, n=1, fn=np.zeros)
     self._reward = np.zeros((1,), dtype=np.float64)
     self._terminal = np.zeros((1,), dtype=np.bool_)
     self._action = None
コード例 #7
0
 def concat_obs(self, observations):
     return concatenate(
         self.observation_space,
         [
             item
             for obs in observations
             for item in iterate(self.observation_space, obs)
         ],
         create_empty_array(self.observation_space, n=self.num_envs),
     )
コード例 #8
0
ファイル: async_vector_env.py プロジェクト: InbalWaiss/FPS
    def __init__(self, env_fns, observation_space=None, action_space=None,
                 shared_memory=True, copy=True, context=None, daemon=True, worker=None):
        ctx = mp.get_context(context)
        self.env_fns = env_fns
        self.shared_memory = shared_memory
        self.copy = copy

        if (observation_space is None) or (action_space is None):
            dummy_env = env_fns[0]()
            observation_space = observation_space or dummy_env.observation_space
            action_space = action_space or dummy_env.action_space
            dummy_env.close()
            del dummy_env
        super(AsyncVectorEnv, self).__init__(num_envs=len(env_fns),
            observation_space=observation_space, action_space=action_space)

        if self.shared_memory:
            try:
                _obs_buffer = create_shared_memory(self.single_observation_space,
                    n=self.num_envs, ctx=ctx)
                self.observations = read_from_shared_memory(_obs_buffer,
                    self.single_observation_space, n=self.num_envs)
            except CustomSpaceError:
                raise ValueError('Using `shared_memory=True` in `AsyncVectorEnv` '
                    'is incompatible with non-standard Gym observation spaces '
                    '(i.e. custom spaces inheriting from `gym.Space`), and is '
                    'only compatible with default Gym spaces (e.g. `Box`, '
                    '`Tuple`, `Dict`) for batching. Set `shared_memory=False` '
                    'if you use custom observation spaces.')
        else:
            _obs_buffer = None
            self.observations = create_empty_array(
            	self.single_observation_space, n=self.num_envs, fn=np.zeros)

        self.parent_pipes, self.processes = [], []
        self.error_queue = ctx.Queue()
        target = _worker_shared_memory if self.shared_memory else _worker
        target = worker or target
        with clear_mpi_env_vars():
            for idx, env_fn in enumerate(self.env_fns):
                parent_pipe, child_pipe = ctx.Pipe()
                process = ctx.Process(target=target,
                    name='Worker<{0}>-{1}'.format(type(self).__name__, idx),
                    args=(idx, CloudpickleWrapper(env_fn), child_pipe,
                    parent_pipe, _obs_buffer, self.error_queue))

                self.parent_pipes.append(parent_pipe)
                self.processes.append(process)

                process.daemon = daemon
                process.start()
                child_pipe.close()

        self._state = AsyncState.DEFAULT
        self._check_observation_spaces()
コード例 #9
0
    def concat_obs(self, obs_dict):
        obs_list = []
        for i, agent in enumerate(self.par_env.possible_agents):
            if agent not in obs_dict:
                raise AssertionError(
                    "environment has agent death. Not allowed for pettingzoo_env_to_vec_env_v1 unless black_death is True"
                )
            obs_list.append(obs_dict[agent])

        return concatenate(
            self.observation_space,
            obs_list,
            create_empty_array(self.observation_space, self.num_envs),
        )
コード例 #10
0
def fuse_and_batch(item_space: spaces.Space, *sequences: Sequence[Sequence[T]],
                   n_items: int) -> Sequence[T]:
    # fuse the lists
    # print(f"Fusing {n_items} items from space {item_space}")
    # sequence_a, sequence_b = sequences
    assert all(isinstance(sequence, list) for sequence in sequences)
    out = create_empty_array(item_space, n=n_items)
    # # Concatenate the (two) batches into a single batch of samples.
    items_batch = np.concatenate([
        np.asarray(v).reshape([-1, *item_space.shape])
        for v in itertools.chain(*sequences)
    ])
    # # Split this batch of samples into a list of items from each space.
    items = [
        v.reshape(item_space.shape) for v in np.split(items_batch, n_items)
    ]
    # TODO: Need to add more tests to make sure this works with custom spaces and Dict spaces.
    return concatenate(items, out, item_space)
コード例 #11
0
def dict_vec_env_test(env):
    # tests that environment really is a vectorized
    # version of the environment returned by make_env

    obss = env.reset()
    for i in range(55):
        actions = [env.action_space.sample() for i in range(env.num_envs)]
        actions = concatenate(
            env.action_space,
            actions,
            create_empty_array(env.action_space, env.num_envs),
        )
        obss, rews, dones, infos = env.step(actions)
        assert obss["feature"][1][0] == 1
        assert {
            "feature": obss["feature"][1][:],
            "id": [o[1] for o in obss["id"]],
        } in env.observation_space
        # no agent death, only env death
        if any(dones):
            assert all(dones)
コード例 #12
0
    def __init__(
        self,
        env_fns: Iterator[callable],
        observation_space: Space = None,
        action_space: Space = None,
        copy: bool = True,
    ):
        """Vectorized environment that serially runs multiple environments.

        Args:
            env_fns: iterable of callable functions that create the environments.
            observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken.
            action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken.
            copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations.

        Raises:
            RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment).
        """
        self.env_fns = env_fns
        self.envs = [env_fn() for env_fn in env_fns]
        self.copy = copy
        self.metadata = self.envs[0].metadata

        if (observation_space is None) or (action_space is None):
            observation_space = observation_space or self.envs[
                0].observation_space
            action_space = action_space or self.envs[0].action_space
        super().__init__(
            num_envs=len(self.envs),
            observation_space=observation_space,
            action_space=action_space,
        )

        self._check_spaces()
        self.observations = create_empty_array(self.single_observation_space,
                                               n=self.num_envs,
                                               fn=np.zeros)
        self._rewards = np.zeros((self.num_envs, ), dtype=np.float64)
        self._dones = np.zeros((self.num_envs, ), dtype=np.bool_)
        self._actions = None
コード例 #13
0
    def __init__(
        self,
        env_fns: Sequence[callable],
        observation_space: Optional[gym.Space] = None,
        action_space: Optional[gym.Space] = None,
        shared_memory: bool = True,
        copy: bool = True,
        context: Optional[str] = None,
        daemon: bool = True,
        worker: Optional[callable] = None,
    ):
        """Vectorized environment that runs multiple environments in parallel.

        Args:
            env_fns: Functions that create the environments.
            observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken.
            action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken.
            shared_memory: If ``True``, then the observations from the worker processes are communicated back through shared variables. This can improve the efficiency if the observations are large (e.g. images).
            copy: If ``True``, then the :meth:`~AsyncVectorEnv.reset` and :meth:`~AsyncVectorEnv.step` methods return a copy of the observations.
            context: Context for `multiprocessing`_. If ``None``, then the default context is used.
            daemon: If ``True``, then subprocesses have ``daemon`` flag turned on; that is, they will quit if the head process quits. However, ``daemon=True`` prevents subprocesses to spawn children, so for some environments you may want to have it set to ``False``.
            worker: If set, then use that worker in a subprocess instead of a default one. Can be useful to override some inner vector env logic, for instance, how resets on done are handled.

        Warnings: worker is an advanced mode option. It provides a high degree of flexibility and a high chance to shoot yourself in the foot; thus, if you are writing your own worker, it is recommended to start from the code for ``_worker`` (or ``_worker_shared_memory``) method, and add changes.

        Raises:
            RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment).
            ValueError: If observation_space is a custom space (i.e. not a default space in Gym, such as gym.spaces.Box, gym.spaces.Discrete, or gym.spaces.Dict) and shared_memory is True.
        """
        ctx = mp.get_context(context)
        self.env_fns = env_fns
        self.shared_memory = shared_memory
        self.copy = copy
        dummy_env = env_fns[0]()
        self.metadata = dummy_env.metadata

        if (observation_space is None) or (action_space is None):
            observation_space = observation_space or dummy_env.observation_space
            action_space = action_space or dummy_env.action_space
        dummy_env.close()
        del dummy_env
        super().__init__(
            num_envs=len(env_fns),
            observation_space=observation_space,
            action_space=action_space,
        )

        if self.shared_memory:
            try:
                _obs_buffer = create_shared_memory(
                    self.single_observation_space, n=self.num_envs, ctx=ctx)
                self.observations = read_from_shared_memory(
                    self.single_observation_space,
                    _obs_buffer,
                    n=self.num_envs)
            except CustomSpaceError:
                raise ValueError(
                    "Using `shared_memory=True` in `AsyncVectorEnv` "
                    "is incompatible with non-standard Gym observation spaces "
                    "(i.e. custom spaces inheriting from `gym.Space`), and is "
                    "only compatible with default Gym spaces (e.g. `Box`, "
                    "`Tuple`, `Dict`) for batching. Set `shared_memory=False` "
                    "if you use custom observation spaces.")
        else:
            _obs_buffer = None
            self.observations = create_empty_array(
                self.single_observation_space, n=self.num_envs, fn=np.zeros)

        self.parent_pipes, self.processes = [], []
        self.error_queue = ctx.Queue()
        target = _worker_shared_memory if self.shared_memory else _worker
        target = worker or target
        with clear_mpi_env_vars():
            for idx, env_fn in enumerate(self.env_fns):
                parent_pipe, child_pipe = ctx.Pipe()
                process = ctx.Process(
                    target=target,
                    name=f"Worker<{type(self).__name__}>-{idx}",
                    args=(
                        idx,
                        CloudpickleWrapper(env_fn),
                        child_pipe,
                        parent_pipe,
                        _obs_buffer,
                        self.error_queue,
                    ),
                )

                self.parent_pipes.append(parent_pipe)
                self.processes.append(process)

                process.daemon = daemon
                process.start()
                child_pipe.close()

        self._state = AsyncState.DEFAULT
        self._check_spaces()
コード例 #14
0
 def concatenate_actions(self, actions, n_actions):
     return concatenate(
         self.action_space,
         actions,
         create_empty_array(self.action_space, n=n_actions),
     )
コード例 #15
0
def async_loop(vec_env_constr, inpt_p, pipe, shared_obs, shared_rews,
               shared_dones):
    inpt_p.close()
    try:
        vec_env = vec_env_constr()

        pipe.send((vec_env.num_envs))
        env_start_idx = pipe.recv()
        env_end_idx = env_start_idx + vec_env.num_envs
        while True:
            instr = pipe.recv()
            comp_infos = []

            if instr == "close":
                vec_env.close()

            elif isinstance(instr, tuple):
                name, data = instr

                if name == "reset":
                    if not data[1]:
                        observations = vec_env.reset(seed=data[0],
                                                     options=data[2])
                    else:
                        observations, infos = vec_env.reset(
                            seed=data[0], return_info=data[1], options=data[2])
                        comp_infos = compress_info(infos)

                    write_observations(vec_env, env_start_idx, shared_obs,
                                       observations)
                    shared_dones.np_arr[env_start_idx:env_end_idx] = False
                    shared_rews.np_arr[env_start_idx:env_end_idx] = 0.0

                elif name == "step":
                    actions = data
                    actions = concatenate(
                        vec_env.action_space,
                        actions,
                        create_empty_array(vec_env.action_space,
                                           n=len(actions)),
                    )
                    observations, rewards, dones, infos = vec_env.step(actions)
                    write_observations(vec_env, env_start_idx, shared_obs,
                                       observations)
                    shared_dones.np_arr[env_start_idx:env_end_idx] = dones
                    shared_rews.np_arr[env_start_idx:env_end_idx] = rewards
                    comp_infos = compress_info(infos)

                elif name == "env_is_wrapped":
                    comp_infos = vec_env.env_is_wrapped(data)

                elif name == "render":
                    render_result = vec_env.render(data)
                    if data == "rgb_array":
                        comp_infos = render_result

                else:
                    raise AssertionError("bad tuple instruction name: " + name)
            elif instr == "terminate":
                return
            else:
                raise AssertionError("bad instruction: " + instr)
            pipe.send(comp_infos)
    except BaseException as e:
        tb = traceback.format_exc()
        pipe.send((e, tb))