Ejemplo n.º 1
0
def test_batched_state(env_name: str, batch_size: int):
    max_steps_per_episode = 10

    env = make_batched_env(env_name, batch_size=batch_size)
    dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

    env: GymDataLoader = GymDataLoader(
        dataset,
        batch_size=batch_size,
    )
    with gym.make(env_name) as temp_env:
        state_shape = temp_env.observation_space.shape
        action_shape = temp_env.action_space.shape

    state_shape = (batch_size, *state_shape)
    action_shape = (batch_size, *action_shape)
    reward_shape = (batch_size, )

    state = env.reset()
    assert state.shape == state_shape
    env.seed(123)
    i = 0
    for obs_batch in take(env, 5):
        assert obs_batch.shape == state_shape

        random_actions = env.action_space.sample()
        assert torch.as_tensor(random_actions).shape == action_shape
        assert temp_env.action_space.contains(random_actions[0])

        reward = env.send(random_actions)
        assert reward.shape == reward_shape
        i += 1
    assert i == 5
Ejemplo n.º 2
0
def test_spaces(env_name: str, batch_size: int):
    dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))

    batched_obs_space = dataset.observation_space
    # NOTE: the VectorEnv class creates the 'batched' action space by creating a
    # Tuple of the single action space, of length 'N', which seems a bit weird.
    # batched_action_space = vector_env.action_space
    batched_action_space = batch_space(dataset.single_action_space, batch_size)

    dataloader_env = GymDataLoader(dataset, batch_size=batch_size)
    assert dataloader_env.observation_space == batched_obs_space
    assert dataloader_env.action_space == batched_action_space

    dataloader_env.reset()
    for observation_batch in take(dataloader_env, 3):
        if isinstance(observation_batch, Tensor):
            observation_batch = observation_batch.cpu().numpy()
        assert observation_batch in batched_obs_space

        actions = dataloader_env.action_space.sample()
        assert len(actions) == batch_size
        assert actions in batched_action_space

        rewards = dataloader_env.send(actions)
        assert len(rewards) == batch_size
        assert rewards in dataloader_env.reward_space
Ejemplo n.º 3
0
    def _check_environments(self):
        """ Do a quick check to make sure that interacting with the envs/dataloaders
        works correctly.
        """
        # Check that the env's spaces are batched versions of the settings'.
        from gym.vector.utils import batch_space
        from sequoia.settings.passive import PassiveEnvironment

        batch_size = self.batch_size
        for loader_method in [
            self.train_dataloader,
            self.val_dataloader,
            self.test_dataloader,
        ]:
            print(f"\n\nChecking loader method {loader_method.__name__}\n\n")
            env = loader_method(batch_size=batch_size)

            batch_size = env.batch_size

            # We could compare the spaces directly, but that's a bit messy, and
            # would be depends on the type of spaces for each. Instead, we could
            # check samples from such spaces on how the spaces are batched.
            if batch_size:
                expected_observation_space = batch_space(
                    self.observation_space, n=batch_size
                )
                expected_action_space = batch_space(self.action_space, n=batch_size)
                expected_reward_space = batch_space(self.reward_space, n=batch_size)
            else:
                expected_observation_space = self.observation_space
                expected_action_space = self.action_space
                expected_reward_space = self.reward_space

            # TODO: Batching the 'Sparse' makes it really ugly, so just
            # comparing the 'image' portion of the space for now.
            assert (
                env.observation_space[0].shape == expected_observation_space[0].shape
            ), (env.observation_space[0], expected_observation_space[0])

            assert env.action_space == expected_action_space, (
                env.action_space,
                expected_action_space,
            )
            assert env.reward_space == expected_reward_space, (
                env.reward_space,
                expected_reward_space,
            )

            # Check that the 'gym API' interaction is working correctly.
            reset_obs: Observations = env.reset()
            self._check_observations(env, reset_obs)

            for i in range(5):
                actions = env.action_space.sample()
                self._check_actions(env, actions)
                step_observations, step_rewards, done, info = env.step(actions)
                self._check_observations(env, step_observations)
                self._check_rewards(env, step_rewards)
                if batch_size:
                    assert not any(done)
                else:
                    assert not done
                # assert not (done if isinstance(done, bool) else any(done))

            for batch in take(env, 5):
                observations: Observations
                rewards: Optional[Rewards]

                if isinstance(env, PassiveEnvironment):
                    observations, rewards = batch
                else:
                    # in RL atm, the 'dataset' gives back only the observations.
                    # Coul
                    observations, rewards = batch, None

                self._check_observations(env, observations)
                if rewards is not None:
                    self._check_rewards(env, rewards)

                if batch_size:
                    actions = tuple(
                        self.action_space.sample() for _ in range(batch_size)
                    )
                else:
                    actions = self.action_space.sample()
                # actions = self.Actions(torch.as_tensor(actions))
                rewards = env.send(actions)
                self._check_rewards(env, rewards)

            env.close()