Esempio n. 1
0
    def reset_wait(self, timeout=None):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `reset_wait` times out. If
            `None`, the call to `reset_wait` never times out.

        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError('Calling `reset_wait` without any prior '
                'call to `reset_async`.', AsyncState.WAITING_RESET.value)

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError('The call to `reset_wait` has timed out after '
                '{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if not self.shared_memory:
            concatenate(results, self.observations, self.single_observation_space)

        return deepcopy(self.observations) if self.copy else self.observations
Esempio n. 2
0
    def reset_wait(
        self,
        timeout: Optional[Union[int, float]] = None,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ) -> Union[ObsType, Tuple[ObsType, List[dict]]]:
        """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results.

        Args:
            timeout: Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out.
            seed: ignored
            return_info: If to return information
            options: ignored

        Returns:
            A tuple of batched observations and list of dictionaries

        Raises:
            ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called).
            NoAsyncCallError: If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`.
            TimeoutError: If :meth:`reset_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError(
                "Calling `reset_wait` without any prior "
                "call to `reset_async`.",
                AsyncState.WAITING_RESET.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `reset_wait` has timed out after {timeout} second(s)."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if return_info:
            infos = {}
            results, info_data = zip(*results)
            for i, info in enumerate(info_data):
                infos = self._add_info(infos, info, i)

            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return (deepcopy(self.observations)
                    if self.copy else self.observations), infos
        else:
            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return deepcopy(
                self.observations) if self.copy else self.observations
Esempio n. 3
0
    def step_wait(self):
        observations_list, infos = [], []
        batch_ids, j = [], 0
        num_actions = len(self._actions)
        rewards = np.zeros((num_actions, ), dtype=np.float_)
        for i, env in enumerate(self.envs):
            if self._dones[i]:
                continue

            action = self._actions[j]
            observation, rewards[j], self._dones[i], info = env.step(action)
            batch_ids.append(i)

            if not self._dones[i]:
                observations_list.append(observation)
                infos.append(info)
            j += 1
        assert num_actions == j

        if observations_list:
            observations = create_empty_array(self.single_observation_space,
                                              n=len(observations_list),
                                              fn=np.zeros)
            concatenate(observations_list, observations,
                        self.single_observation_space)
        else:
            observations = None

        return (observations, rewards, np.copy(self._dones), {
            'batch_ids': batch_ids,
            'infos': infos
        })
Esempio n. 4
0
    def step(self, actions):
        """
        Parameters
        ----------
        actions : iterable of samples from `action_space`
            List of actions.

        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.

        rewards : `np.ndarray` instance (dtype `np.float_`)
            A vector of rewards from the vectorized environment.

        dones : `np.ndarray` instance (dtype `np.bool_`)
            A vector whose entries indicate whether the episode has ended.

        infos : list of dict
            A list of auxiliary diagnostic informations.
        """
        observations, infos = [], []
        for i, (env, action) in enumerate(zip(self.envs, actions)):
            observation, self._rewards[i], self._dones[i], info = env.step(
                action)
            if self._dones[i]:
                observation = env.reset()
            observations.append(observation)
            infos.append(info)
        concatenate(observations, self.observations,
                    self.single_observation_space)

        return (deepcopy(self.observations)
                if self.copy else self.observations, np.copy(self._rewards),
                np.copy(self._dones), infos)
Esempio n. 5
0
    def reset_wait(self):
        self._dones[:] = False
        observations = []
        for env in self.envs:
            observation = env.reset()
            observations.append(observation)
        concatenate(observations, self.observations,
                    self.single_observation_space)

        return np.copy(self.observations) if self.copy else self.observations
    async def async_reset(self):
        self._dones[:] = False
        observations = await (asyncio.gather(
            *[call_wrapped_async_reset(env) for env in self.envs]))

        try:
            concatenate(observations, self.observations,
                        self.single_observation_space)
        except ValueError:
            print([n.shape for n in observations])
            raise

        return np.copy(self.observations) if self.copy else self.observations
Esempio n. 7
0
    def step_wait(self):
        observations, infos = [], []
        for i, (env, action) in enumerate(zip(self.envs, self._actions)):
            observation, self._rewards[i], self._dones[i], info = env.step(
                action)
            if self._dones[i]:
                observation = env.reset()
            observations.append(observation)
            infos.append(info)
        concatenate(observations, self.observations,
                    self.single_observation_space)

        return (deepcopy(self.observations)
                if self.copy else self.observations, np.copy(self._rewards),
                np.copy(self._dones), infos)
 def step_wait(self, **kwargs) -> Tuple[
     Any, np.ndarray, np.ndarray, Tuple[Dict[str, Any]]]:
     observation, self._reward[0], self._terminal[0], info = self.env.step(self._action)
     if self._terminal[0]:
         observation = self.env.reset()
     self._observation = concatenate([observation], self._observation, self.single_observation_space)
     return self._observation, np.copy(self._reward), np.copy(self._terminal), (info,)
Esempio n. 9
0
    def reset(self):
        """
        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.
        """
        self._dones[:] = False
        observations = []
        for env in self.envs:
            observation = env.reset()
            observations.append(observation)
        concatenate(observations, self.observations,
                    self.single_observation_space)

        return np.copy(self.observations) if self.copy else self.observations
Esempio n. 10
0
    def step_wait(
        self,
        timeout: Optional[Union[int, float]] = None
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[dict]]:
        """Wait for the calls to :obj:`step` in each sub-environment to finish.

        Args:
            timeout: Number of seconds before the call to :meth:`step_wait` times out. If ``None``, the call to :meth:`step_wait` never times out.

        Returns:
             The batched environment step information, obs, reward, done and info

        Raises:
            ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called).
            NoAsyncCallError: If :meth:`step_wait` was called without any prior call to :meth:`step_async`.
            TimeoutError: If :meth:`step_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_STEP:
            raise NoAsyncCallError(
                "Calling `step_wait` without any prior call "
                "to `step_async`.",
                AsyncState.WAITING_STEP.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `step_wait` has timed out after {timeout} second(s)."
            )

        observations_list, rewards, dones, infos = [], [], [], {}
        successes = []
        for i, pipe in enumerate(self.parent_pipes):
            result, success = pipe.recv()
            obs, rew, done, info = result

            successes.append(success)
            observations_list.append(obs)
            rewards.append(rew)
            dones.append(done)
            infos = self._add_info(infos, info, i)

        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if not self.shared_memory:
            self.observations = concatenate(
                self.single_observation_space,
                observations_list,
                self.observations,
            )

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.array(rewards),
            np.array(dones, dtype=np.bool_),
            infos,
        )
    def step_wait(self, timeout=None):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `step_wait` times out. If
            `None`, the call to `step_wait` never times out.

        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.

        rewards : `np.ndarray` instance (dtype `np.float_`)
            A vector of rewards from the vectorized environment.

        dones : `np.ndarray` instance (dtype `np.bool_`)
            A vector whose entries indicate whether the episode has ended.

        infos : list of dict
            A list of auxiliary diagnostic informations.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_STEP:
            raise NoAsyncCallError(
                'Calling `step_wait` without any prior call '
                'to `step_async`.', AsyncState.WAITING_STEP.value)

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                'The call to `step_wait` has timed out after '
                '{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT
        observations_list, rewards, dones, infos = zip(*results)

        if not self.shared_memory:
            concatenate(observations_list, self.observations,
                        self.single_observation_space)

        return (deepcopy(self.observations)
                if self.copy else self.observations, np.array(rewards),
                np.array(dones, dtype=np.bool_), infos)
    async def async_step(self, actions):
        observations, infos = [], []
        p = await (asyncio.gather(
            *[
                asyncio.wait_for(call_wrapped_async_step(env, action),
                                 timeout=1.0)
                for env, action in zip(self.envs, actions)
            ],
            return_exceptions=True,
        ))
        for i, result in enumerate(p):
            env = self.envs[i]
            if isinstance(result, Exception):
                print("Error:", type(result), result, self.envs[i].task)
                # traceback.print_exception(result)
                observation, self._rewards[i], self._dones[i], info = [
                    call_wrapped_async_reset(env),
                    0.0,
                    False,
                    {
                        "bad_transition": True
                    },
                ]
            else:
                observation, self._rewards[i], self._dones[i], info = result
            observations.append(observation)
            infos.append(info)
        p = await (asyncio.gather(
            *[o for o in observations if asyncio.iscoroutine(o)]))
        j = 0
        for i, o in enumerate(observations):
            if asyncio.iscoroutine(o):
                observations[i] = p[j]
                j += 1

        concatenate(observations, self.observations,
                    self.single_observation_space)

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.copy(self._rewards),
            np.copy(self._dones),
            infos,
        )
Esempio n. 13
0
 def concat_obs(self, observations):
     return concatenate(
         self.observation_space,
         [
             item
             for obs in observations
             for item in iterate(self.observation_space, obs)
         ],
         create_empty_array(self.observation_space, n=self.num_envs),
     )
Esempio n. 14
0
def _concatenate_typed_dicts(
    space: TypedDictSpace,
    items: Union[list, tuple],
    out: Union[tuple, dict, np.ndarray],
) -> Dict:
    return space.dtype(
        **{
            key: concatenate(subspace, [item[key] for item in items], out=out[key])
            for (key, subspace) in space.spaces.items()
        }
    )
Esempio n. 15
0
    def reset_wait(
        self,
        seed: Optional[Union[int, list[int]]] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results.

        Args:
            seed: The reset environment seed
            return_info: If to return information
            options: Option information for the environment reset

        Returns:
            The reset observation of the environment and reset information
        """
        if seed is None:
            seed = [None for _ in range(self.num_envs)]
        if isinstance(seed, int):
            seed = [seed + i for i in range(self.num_envs)]
        assert len(seed) == self.num_envs

        self._dones[:] = False
        observations = []
        data_list = []
        for env, single_seed in zip(self.envs, seed):

            kwargs = {}
            if single_seed is not None:
                kwargs["seed"] = single_seed
            if options is not None:
                kwargs["options"] = options
            if return_info is True:
                kwargs["return_info"] = return_info

            if not return_info:
                observation = env.reset(**kwargs)
                observations.append(observation)
            else:
                observation, data = env.reset(**kwargs)
                observations.append(observation)
                data_list.append(data)

        self.observations = concatenate(self.single_observation_space,
                                        observations, self.observations)
        if not return_info:
            return deepcopy(
                self.observations) if self.copy else self.observations
        else:
            return (deepcopy(self.observations)
                    if self.copy else self.observations), data_list
Esempio n. 16
0
    def concat_obs(self, obs_dict):
        obs_list = []
        for i, agent in enumerate(self.par_env.possible_agents):
            if agent not in obs_dict:
                raise AssertionError(
                    "environment has agent death. Not allowed for pettingzoo_env_to_vec_env_v1 unless black_death is True"
                )
            obs_list.append(obs_dict[agent])

        return concatenate(
            self.observation_space,
            obs_list,
            create_empty_array(self.observation_space, self.num_envs),
        )
Esempio n. 17
0
    def reset_wait(self, timeout=None):
        """Wait for the calls to :obj:`reset` in each sub-environment to finish.

        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to :meth:`reset_wait` times out.
            If ``None``, the call to :meth:`reset_wait` never times out.

        Returns
        -------
        element of :attr:`~VectorEnv.observation_space`
            A batch of observations from the vectorized environment.

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        NoAsyncCallError
            If :meth:`reset_wait` was called without any prior call to
            :meth:`reset_async`.

        TimeoutError
            If :meth:`reset_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError(
                "Calling `reset_wait` without any prior " "call to `reset_async`.",
                AsyncState.WAITING_RESET.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `reset_wait` has timed out after {timeout} second{'s' if timeout > 1 else ''}."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if not self.shared_memory:
            self.observations = concatenate(
                results, self.observations, self.single_observation_space
            )

        return deepcopy(self.observations) if self.copy else self.observations
Esempio n. 18
0
def fuse_and_batch(item_space: spaces.Space, *sequences: Sequence[Sequence[T]],
                   n_items: int) -> Sequence[T]:
    # fuse the lists
    # print(f"Fusing {n_items} items from space {item_space}")
    # sequence_a, sequence_b = sequences
    assert all(isinstance(sequence, list) for sequence in sequences)
    out = create_empty_array(item_space, n=n_items)
    # # Concatenate the (two) batches into a single batch of samples.
    items_batch = np.concatenate([
        np.asarray(v).reshape([-1, *item_space.shape])
        for v in itertools.chain(*sequences)
    ])
    # # Split this batch of samples into a list of items from each space.
    items = [
        v.reshape(item_space.shape) for v in np.split(items_batch, n_items)
    ]
    # TODO: Need to add more tests to make sure this works with custom spaces and Dict spaces.
    return concatenate(items, out, item_space)
Esempio n. 19
0
    def step_wait(self):
        observations, infos = [], []
        for i, (env, action) in enumerate(zip(self.envs, self._actions)):
            observation, self._rewards[i], self._dones[i], info = env.step(
                action)
            if info.get("TimeLimit.truncated", False):
                info["TimeLimit.next_obs"] = observation
            if self._dones[i]:
                observation = env.reset()
            observations.append(observation)
            infos.append(info)
        self.observations = concatenate(observations, self.observations,
                                        self.single_observation_space)

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.copy(self._rewards),
            np.copy(self._dones),
            infos,
        )
Esempio n. 20
0
    def reset_wait(
        self,
        seed: Optional[Union[int, List[int]]] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        if seed is None:
            seed = [None for _ in range(self.num_envs)]
        if isinstance(seed, int):
            seed = [seed + i for i in range(self.num_envs)]
        assert len(seed) == self.num_envs

        self._dones[:] = False
        observations = []
        data_list = []
        for env, single_seed in zip(self.envs, seed):

            kwargs = {}
            if single_seed is not None:
                kwargs["seed"] = single_seed
            if options is not None:
                kwargs["options"] = options
            if return_info == True:
                kwargs["return_info"] = return_info

            if not return_info:
                observation = env.reset(**kwargs)
                observations.append(observation)
            else:
                observation, data = env.reset(**kwargs)
                observations.append(observation)
                data_list.append(data)

        self.observations = concatenate(self.single_observation_space,
                                        observations, self.observations)
        if not return_info:
            return deepcopy(
                self.observations) if self.copy else self.observations
        else:
            return (deepcopy(self.observations)
                    if self.copy else self.observations), data_list
def dict_vec_env_test(env):
    # tests that environment really is a vectorized
    # version of the environment returned by make_env

    obss = env.reset()
    for i in range(55):
        actions = [env.action_space.sample() for i in range(env.num_envs)]
        actions = concatenate(
            env.action_space,
            actions,
            create_empty_array(env.action_space, env.num_envs),
        )
        obss, rews, dones, infos = env.step(actions)
        assert obss["feature"][1][0] == 1
        assert {
            "feature": obss["feature"][1][:],
            "id": [o[1] for o in obss["id"]],
        } in env.observation_space
        # no agent death, only env death
        if any(dones):
            assert all(dones)
Esempio n. 22
0
    def step_wait(self):
        """Steps through each of the environments returning the batched results.

        Returns:
            The batched environment step results
        """
        observations, infos = [], []
        for i, (env, action) in enumerate(zip(self.envs, self._actions)):
            observation, self._rewards[i], self._dones[i], info = env.step(
                action)
            if self._dones[i]:
                info["terminal_observation"] = observation
                observation = env.reset()
            observations.append(observation)
            infos.append(info)
        self.observations = concatenate(self.single_observation_space,
                                        observations, self.observations)

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.copy(self._rewards),
            np.copy(self._dones),
            infos,
        )
Esempio n. 23
0
 def reset_wait(self, **kwargs) -> Any:
     observation = self.env.reset()
     self._observation = concatenate([observation], self._observation, self.single_observation_space)
     return self._observation
Esempio n. 24
0
    def step_wait(self, timeout=None):
        """Wait for the calls to :obj:`step` in each sub-environment to finish.

        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to :meth:`step_wait` times out. If
            ``None``, the call to :meth:`step_wait` never times out.

        Returns
        -------
        observations : element of :attr:`~VectorEnv.observation_space`
            A batch of observations from the vectorized environment.

        rewards : :obj:`np.ndarray`, dtype :obj:`np.float_`
            A vector of rewards from the vectorized environment.

        dones : :obj:`np.ndarray`, dtype :obj:`np.bool_`
            A vector whose entries indicate whether the episode has ended.

        infos : list of dict
            A list of auxiliary diagnostic information dicts from sub-environments.

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        NoAsyncCallError
            If :meth:`step_wait` was called without any prior call to
            :meth:`step_async`.

        TimeoutError
            If :meth:`step_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_STEP:
            raise NoAsyncCallError(
                "Calling `step_wait` without any prior call "
                "to `step_async`.",
                AsyncState.WAITING_STEP.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `step_wait` has timed out after {timeout} second(s)."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT
        observations_list, rewards, dones, infos = zip(*results)

        if not self.shared_memory:
            self.observations = concatenate(
                self.single_observation_space,
                observations_list,
                self.observations,
            )

        return (
            deepcopy(self.observations) if self.copy else self.observations,
            np.array(rewards),
            np.array(dones, dtype=np.bool_),
            infos,
        )
Esempio n. 25
0
    def reset_wait(
        self,
        timeout=None,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `reset_wait` times out. If
            `None`, the call to `reset_wait` never times out.
        seed: ignored
        options: ignored

        Returns
        -------
        element of :attr:`~VectorEnv.observation_space`
            A batch of observations from the vectorized environment.
        infos : list of dicts containing metadata

        Raises
        ------
        ClosedEnvironmentError
            If the environment was closed (if :meth:`close` was previously called).

        NoAsyncCallError
            If :meth:`reset_wait` was called without any prior call to
            :meth:`reset_async`.

        TimeoutError
            If :meth:`reset_wait` timed out.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_RESET:
            raise NoAsyncCallError(
                "Calling `reset_wait` without any prior "
                "call to `reset_async`.",
                AsyncState.WAITING_RESET.value,
            )

        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError(
                f"The call to `reset_wait` has timed out after {timeout} second(s)."
            )

        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT

        if return_info:
            results, infos = zip(*results)
            infos = list(infos)

            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return (deepcopy(self.observations)
                    if self.copy else self.observations), infos
        else:
            if not self.shared_memory:
                self.observations = concatenate(self.single_observation_space,
                                                results, self.observations)

            return deepcopy(
                self.observations) if self.copy else self.observations
Esempio n. 26
0
def async_loop(vec_env_constr, inpt_p, pipe, shared_obs, shared_rews,
               shared_dones):
    inpt_p.close()
    try:
        vec_env = vec_env_constr()

        pipe.send((vec_env.num_envs))
        env_start_idx = pipe.recv()
        env_end_idx = env_start_idx + vec_env.num_envs
        while True:
            instr = pipe.recv()
            comp_infos = []

            if instr == "close":
                vec_env.close()

            elif isinstance(instr, tuple):
                name, data = instr

                if name == "reset":
                    if not data[1]:
                        observations = vec_env.reset(seed=data[0],
                                                     options=data[2])
                    else:
                        observations, infos = vec_env.reset(
                            seed=data[0], return_info=data[1], options=data[2])
                        comp_infos = compress_info(infos)

                    write_observations(vec_env, env_start_idx, shared_obs,
                                       observations)
                    shared_dones.np_arr[env_start_idx:env_end_idx] = False
                    shared_rews.np_arr[env_start_idx:env_end_idx] = 0.0

                elif name == "step":
                    actions = data
                    actions = concatenate(
                        vec_env.action_space,
                        actions,
                        create_empty_array(vec_env.action_space,
                                           n=len(actions)),
                    )
                    observations, rewards, dones, infos = vec_env.step(actions)
                    write_observations(vec_env, env_start_idx, shared_obs,
                                       observations)
                    shared_dones.np_arr[env_start_idx:env_end_idx] = dones
                    shared_rews.np_arr[env_start_idx:env_end_idx] = rewards
                    comp_infos = compress_info(infos)

                elif name == "env_is_wrapped":
                    comp_infos = vec_env.env_is_wrapped(data)

                elif name == "render":
                    render_result = vec_env.render(data)
                    if data == "rgb_array":
                        comp_infos = render_result

                else:
                    raise AssertionError("bad tuple instruction name: " + name)
            elif instr == "terminate":
                return
            else:
                raise AssertionError("bad instruction: " + instr)
            pipe.send(comp_infos)
    except BaseException as e:
        tb = traceback.format_exc()
        pipe.send((e, tb))
Esempio n. 27
0
 def concatenate_actions(self, actions, n_actions):
     return concatenate(
         self.action_space,
         actions,
         create_empty_array(self.action_space, n=n_actions),
     )