예제 #1
0
    def _step_wait(
        self
    ) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, Iterable[Dict[
            Any, Any]]]:
        """
        Wait for the step taken with step_async().

        :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information
        """
        results = [remote.recv() for remote in self.remotes]
        self.waiting = False
        obs, rews, env_dones, infos, actor_dones, actor_ids, episode_stats, env_times = zip(
            *results)

        self._env_times = np.stack(env_times)
        self._actor_dones = np.stack(actor_dones)
        self._actor_ids = actor_ids

        # collect episode statistics
        for stat in episode_stats:
            if stat is not None:
                self.epoch_stats.receive(stat)

        return stack_numpy_dict_list(obs), np.stack(rews), np.stack(
            env_dones), infos
예제 #2
0
    def stack(cls, records: List['SpacesRecord']) -> 'SpacesRecord':
        """Stack multiple records into a single spaces record. Useful for processing multiple records in a batch.

        All the records should be in numpy and have the same structure of the spaces (i.e. come from the same
        environment etc.).

        :param records: Records to stack.
        :return: Single stacked record, containing all the given records, and having the corresponding batch shape.
        """

        assert len(set([
            r.substep_key for r in records
        ])) == 1, "Cannot batch records for different sub-step keys."
        assert len(set([
            r.agent_id for r in records
        ])) == 1, "Cannot batch records for different agent ids."

        stacked_record = SpacesRecord(
            actor_id=records[0].actor_id,
            observation=stack_numpy_dict_list([r.observation
                                               for r in records]),
            action=stack_numpy_dict_list([r.action for r in records]),
            reward=np.stack([r.reward for r in records]),
            done=np.stack([r.done for r in records]))

        if records[0].next_observation:
            stacked_record.next_observation = stack_numpy_dict_list(
                [r.next_observation for r in records])

        if records[0].logits:
            stacked_record.logits = stack_torch_dict_list(
                [r.logits for r in records])

        stacked_record.batch_shape = [len(records)]
        if records[0].batch_shape:
            stacked_record.batch_shape += records[0].batch_shape

        return stacked_record
예제 #3
0
    def reset(self) -> Dict[str, np.ndarray]:
        """VectorEnv implementation"""
        observations = []
        for env in self.envs:
            observations.append(env.reset())
            # send the episode statistics of the environment collected before the reset()
            self.epoch_stats.receive(
                env.get_stats(LogStatsLevel.EPISODE).last_stats)

        self._env_times = np.array([env.get_env_time() for env in self.envs])
        self._actor_ids = [env.actor_id() for env in self.envs]
        self._actor_dones = np.hstack(
            [env.is_actor_done() for env in self.envs])

        return stack_numpy_dict_list(observations)
예제 #4
0
    def compute_action(self,
                       observation: ObservationType,
                       maze_state: Optional[MazeStateType],
                       env: Optional[BaseEnv] = None,
                       actor_id: Optional[ActorID] = None,
                       deterministic: bool = False) -> ActionType:
        """Sample multiple actions together."""
        if actor_id:
            action_space = self.action_spaces_dict[actor_id.step_key]
        else:
            assert len(self.action_spaces_dict) == 1, "action spaces for multiple sub-steps are available, please " \
                                                      "specify actor ID explicitly"
            action_space = list(self.action_spaces_dict.values())[0]

        return stack_numpy_dict_list(
            [action_space.sample() for _ in range(self.concurrency)])
예제 #5
0
    def reset(self) -> Dict[str, np.ndarray]:
        """VectorEnv implementation"""
        for remote in self.remotes:
            remote.send(('reset', None))
        results = [remote.recv() for remote in self.remotes]
        obs, actor_dones, actor_ids, episode_stats, env_times = zip(*results)

        self._env_times = np.stack(env_times)
        self._actor_dones = np.stack(actor_dones)
        self._actor_ids = actor_ids

        # collect episode statistics
        for stat in episode_stats:
            if stat is not None:
                self.epoch_stats.receive(stat)

        return stack_numpy_dict_list(obs)
예제 #6
0
    def step(
        self, actions: ActionType
    ) -> Tuple[ObservationType, np.ndarray, np.ndarray, Iterable[Dict[Any,
                                                                      Any]]]:
        """Step the environments with the given actions.

        :param actions: the list of actions for the respective envs.
        :return: observations, rewards, dones, information-dicts all in env-aggregated form.
        """
        actions = unstack_numpy_list_dict(actions)
        observations, rewards, env_dones, infos, actor_dones, actor_ids = [], [], [], [], [], []

        for i, env in enumerate(self.envs):
            o, r, env_done, i = env.step(actions[i])
            actor_dones.append(env.is_actor_done())
            actor_ids.append(env.actor_id())

            if env_done:
                o = env.reset()
                # collect the episode statistics for finished environments
                self.epoch_stats.receive(
                    env.get_stats(LogStatsLevel.EPISODE).last_stats)

            observations.append(o)
            rewards.append(r)
            env_dones.append(env_done)
            infos.append(i)

        obs = stack_numpy_dict_list(observations)
        rewards = np.hstack(rewards).astype(np.float32)
        env_dones = np.hstack(env_dones)

        self._env_times = np.array([env.get_env_time() for env in self.envs])
        self._actor_dones = np.hstack(actor_dones)
        self._actor_ids = actor_ids

        return obs, rewards, env_dones, infos
예제 #7
0
def test_torch_conversion():
    stacked = convert_to_torch(stack_numpy_dict_list(unstacked_example()), in_place=True, cast=None, device="cpu")
    for k, v in convert_to_torch(stacked_example(), in_place=True, cast=None, device="cpu").items():
        assert torch.all(v == stacked[k])
예제 #8
0
def test_numpy_dict_list_stacking():
    stacked = stack_numpy_dict_list(unstacked_example())
    for k, v in stacked_example().items():
        assert np.all(v == stacked[k])