Ejemplo n.º 1
0
from gym import Env, Wrapper
from gym.envs.classic_control import CartPoleEnv
from gym import spaces
from sequoia.conftest import param_requires_atari_py
from .async_vector_env import AsyncVectorEnv


@pytest.fixture()
def allow_remote_getattr(monkeypatch):
    assert hasattr(AsyncVectorEnv, "allow_remote_getattr")
    monkeypatch.setattr(AsyncVectorEnv, "allow_remote_getattr", True)


@pytest.mark.parametrize("env_name, expected_obs_shape", [
    ("CartPole-v0", (4, )),
    param_requires_atari_py("Breakout-v0", (210, 160, 3)),
])
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_spaces(batch_size: int, env_name: str,
                expected_obs_shape: Tuple[int, ...]):
    env_fns = [partial(gym.make, env_name) for _ in range(batch_size)]
    env: AsyncVectorEnv

    expected_obs_batch_shape = (batch_size, *expected_obs_shape)

    with AsyncVectorEnv(env_fns=env_fns) as env:
        assert isinstance(env.observation_space, spaces.Box)
        assert env.observation_space.shape == expected_obs_batch_shape

        assert isinstance(env.action_space, spaces.Tuple)
        assert len(env.action_space.spaces) == batch_size
Ejemplo n.º 2
0
                                         TransformObservation)
from sequoia.common.transforms import ChannelsFirstIfNeeded, Transforms
from sequoia.conftest import DummyEnvironment, param_requires_atari_py, xfail
from sequoia.utils import take
from sequoia.utils.logging_utils import get_logger

from .gym_dataloader import GymDataLoader
from .make_env import make_batched_env

logger = get_logger(__file__)


@pytest.mark.parametrize("batch_size", [1, 2, 5])
@pytest.mark.parametrize(
    "env_name",
    ["CartPole-v0", param_requires_atari_py("Breakout-v0")])
def test_spaces(env_name: str, batch_size: int):
    dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))

    batched_obs_space = dataset.observation_space
    # NOTE: the VectorEnv class creates the 'batched' action space by creating a
    # Tuple of the single action space, of length 'N', which seems a bit weird.
    # batched_action_space = vector_env.action_space
    batched_action_space = batch_space(dataset.single_action_space, batch_size)

    dataloader_env = GymDataLoader(dataset, batch_size=batch_size)
    assert dataloader_env.observation_space == batched_obs_space
    assert dataloader_env.action_space == batched_action_space

    dataloader_env.reset()
    for observation_batch in take(dataloader_env, 3):
Ejemplo n.º 3
0
    batch_size = 4
    env = BatchedVectorEnv(
        [partial(gym.make, "CartPole-v0") for i in range(batch_size)]
    )
    env.reset()
    with env:
        for i in range(100):
            actions = env.action_space.sample()
            obs, reward, done, info = env.step(actions)
            env.render(mode="human")
            env.viewer.window


@pytest.mark.timeout(60)
@pytest.mark.parametrize(
    "env_name", ["CartPole-v0", "Pendulum-v0", param_requires_atari_py("Breakout-v0")]
)
def test_with_pixelobservationwrapper_before_batch(env_name: str):
    """ Test out what happens if we put the PixelObservationWrapper before the
    batching, i.e. in each of the environments.
    """
    batch_size = 5
    n_steps = 20
    n_workers = None

    from ..pixel_observation import PixelObservationWrapper

    def make_env():
        return PixelObservationWrapper(gym.make(env_name))

    setup_time, time_per_step = benchmark(
Ejemplo n.º 4
0
            else:
                obs, reward, done, info = train_env.step(
                    train_env.action_space.sample())
            total_steps += 1
    assert total_steps == setting.steps_per_phase


@pytest.mark.timeout(60)
@pytest.mark.parametrize("batch_size", [None, 1, 3])
@pytest.mark.parametrize(
    "dataset, expected_obs_shape",
    [
        ("CartPole-v0", (3, 400, 600)),
        # param_requires_atari_py("Breakout-v0", (3, 210, 160)),
        param_requires_atari_py(
            "Breakout-v0",
            (1, 84, 84)),  # Since the Atari Preprocessing is added by default.
        # TODO: Add support for the duckietown env!
        # ("duckietown", (120, 160, 3)),
    ],
)
def test_check_iterate_and_step(dataset: str, expected_obs_shape: Tuple[int,
                                                                        ...],
                                batch_size: int):
    # TODO: Fix the default transforms, shouldn't necessarily have `to_tensor` in there.
    setting = IncrementalRLSetting(
        dataset=dataset,
        nb_tasks=5,
        train_transforms=[Transforms.to_tensor],
        val_transforms=[Transforms.to_tensor],
        test_transforms=[Transforms.to_tensor],
Ejemplo n.º 5
0
        obs, reward, done, info = env.step(env.action_space.sample())
        if done:
            env.reset()
        length = env.length
        lengths.append(length)
    assert not all(length == starting_length for length in lengths)


@pytest.mark.parametrize("batch_size", [None, 1, 3])
@pytest.mark.parametrize(
    "dataset, expected_obs_shape",
    [
        ("CartPole-v0", (3, 400, 600)),
        # param_requires_atari_py("Breakout-v0", (3, 210, 160)),
        param_requires_atari_py(
            "Breakout-v0", (3, 84, 84)
        ),  # Since the AtariWrapper gets added by default
        # ("duckietown", (120, 160, 3)),
    ],
)
def test_check_iterate_and_step(
    dataset: str, expected_obs_shape: Tuple[int, ...], batch_size: int
):
    """ Test that the observations are of the right type and shape, regardless
    of wether we iterate on the env by calling 'step' or by using it as a
    DataLoader.
    """
    setting = ContinualRLSetting(dataset=dataset)

    expected_obs_batch_shape = (batch_size, *expected_obs_shape)
    if batch_size is None:
Ejemplo n.º 6
0
    env = BatchedVectorEnv(
        [partial(gym.make, "CartPole-v0") for i in range(batch_size)])
    env.reset()
    with env:
        for i in range(100):
            actions = env.action_space.sample()
            obs, reward, done, info = env.step(actions)
            env.render(mode="human")
            env.viewer.window


@pytest.mark.timeout(60)
@pytest.mark.parametrize(
    "env_name",
    ["CartPole-v0", "Pendulum-v0",
     param_requires_atari_py("Breakout-v0")])
def test_with_pixelobservationwrapper_before_batch(env_name: str):
    """ Test out what happens if we put the PixelObservationWrapper before the
    batching, i.e. in each of the environments.
    """
    batch_size = 5
    n_steps = 20
    n_workers = None

    from ..pixel_observation import PixelObservationWrapper

    def make_env():
        return PixelObservationWrapper(gym.make(env_name))

    setup_time, time_per_step = benchmark(batch_size,
                                          n_workers,
Ejemplo n.º 7
0
class TestGymDataLoader:
    # Grouping tests into a class so we can inherit from it in another test module, for
    # instance in the tests for EnvironmentProxy class.
    GymDataLoader: ClassVar[Type[GymDataLoader]] = GymDataLoader

    @pytest.mark.parametrize("batch_size", [1, 2, 5])
    @pytest.mark.parametrize(
        "env_name",
        ["CartPole-v0", param_requires_atari_py("Breakout-v0")])
    def test_spaces(self, env_name: str, batch_size: int):
        dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))

        batched_obs_space = dataset.observation_space
        # NOTE: the VectorEnv class creates the 'batched' action space by creating a
        # Tuple of the single action space, of length 'N', which seems a bit weird.
        # batched_action_space = vector_env.action_space
        batched_action_space = batch_space(dataset.single_action_space,
                                           batch_size)

        dataloader_env = self.GymDataLoader(dataset, batch_size=batch_size)
        assert dataloader_env.observation_space == batched_obs_space
        assert dataloader_env.action_space == batched_action_space

        dataloader_env.reset()
        for observation_batch in take(dataloader_env, 3):
            if isinstance(observation_batch, Tensor):
                observation_batch = observation_batch.cpu().numpy()
            assert observation_batch in batched_obs_space

            actions = dataloader_env.action_space.sample()
            assert len(actions) == batch_size
            assert actions in batched_action_space

            rewards = dataloader_env.send(actions)
            assert len(rewards) == batch_size
            assert rewards in dataloader_env.reward_space

    @pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
    @pytest.mark.parametrize(
        "env_name",
        ["CartPole-v0", param_requires_atari_py("Breakout-v0")])
    def test_max_steps_is_respected(self, env_name: str, batch_size: int):
        max_steps = 5
        env_name = "CartPole-v0"
        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env)
        from sequoia.common.gym_wrappers.action_limit import ActionLimit
        dataset = ActionLimit(dataset, max_steps=max_steps * (batch_size or 1))
        env: GymDataLoader = self.GymDataLoader(dataset)
        env.reset()
        i = 0
        for i, obs in enumerate(env):
            assert obs in env.observation_space
            assert i < max_steps, f"Max steps should have been respected: {i}"
            env.send(env.action_space.sample())
        assert i == max_steps - 1
        env.close()

    @pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
    @pytest.mark.parametrize("seed", [None, 123, 456])
    # @pytest.mark.parametrize(
    #     "env_name", ["CartPole-v0", param_requires_atari_py("Breakout-v0")]
    # )
    def test_multiple_epochs_works(self, batch_size: Optional[int],
                                   seed: Optional[int]):
        epochs = 3
        max_steps_per_episode = 10
        from gym.wrappers import TimeLimit
        from sequoia.conftest import DummyEnvironment
        from sequoia.common.gym_wrappers import AddDoneToObservation

        def env_fn():
            # FIXME: Using the DummyEnvironment for now since it's easier to debug with.
            # env = gym.make(env_name)
            env = DummyEnvironment()
            env = AddDoneToObservation(env)
            env = TimeLimit(env, max_episode_steps=max_steps_per_episode)
            return env

        # assert False, [env_fn(i).unwrapped for i in range(4)]
        # env = gym.vector.make(env_name, num_envs=(batch_size or 1))
        env = make_batched_env(env_fn, batch_size=batch_size)

        batched_env = env
        # from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
        # env = EpisodeLimit(env, max_episodes=epochs)
        from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
        env = ConvertToFromTensors(env)

        env = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = self.GymDataLoader(env)
        # BUG: Seems to be a little bug in the shape of the items yielded by the env due
        # to the concat_fn of the DataLoader.
        # if batch_size and batch_size >= 1:
        #     assert False, (env.reset().shape, env.observation_space, next(iter(env)).shape)
        env.seed(seed)

        all_rewards = []
        with env:
            for epoch in range(epochs):
                for step, obs in enumerate(env):
                    print(f"'epoch' {epoch}, step {step}:, obs: {obs}")
                    assert obs in env.observation_space, obs.shape
                    assert (  # BUG: This isn't working: (sometimes!)
                        step < max_steps_per_episode
                    ), "Max steps per episode should have been respected."
                    rewards = env.send(env.action_space.sample())

                    if batch_size is None:
                        all_rewards.append(rewards)
                    else:
                        all_rewards.extend(rewards)

                # Since in the VectorEnv, 'episodes' are infinite, we must have
                # reached the limit of the number of steps, while in a single
                # environment, the episode might have been shorter.
                assert step <= max_steps_per_episode - 1

            assert epoch == epochs - 1

        if batch_size in [None, 1]:
            # Some episodes might last shorter than the max number of steps per episode,
            # therefore the total should be at most this much:
            assert len(all_rewards) <= epochs * max_steps_per_episode
        else:
            # The maximum number of steps per episode is set, but the env is vectorized,
            # so the number of 'total' rewards we get from all envs should be *exactly*
            # this much:
            assert len(
                all_rewards) == epochs * max_steps_per_episode * batch_size

    @pytest.mark.parametrize("batch_size", [1, 2, 5])
    @pytest.mark.parametrize("env_name",
                             [param_requires_atari_py("Breakout-v0")])
    def test_reward_isnt_always_one(self, env_name: str, batch_size: int):
        epochs = 3
        max_steps_per_episode = 100

        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = self.GymDataLoader(env=dataset)
        all_rewards = []
        with env:
            env.reset()
            for epoch in range(epochs):
                for i, batch in enumerate(env):
                    rewards = env.send(env.action_space.sample())
                    all_rewards.extend(rewards)

        assert all_rewards != np.ones(len(all_rewards)).tolist()

    @pytest.mark.parametrize("env_name", ["CartPole-v0"])
    @pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
    def test_batched_state(self, env_name: str, batch_size: int):
        max_steps_per_episode = 10

        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = GymDataLoader(
            dataset,
            batch_size=batch_size,
        )
        with gym.make(env_name) as temp_env:
            state_shape = temp_env.observation_space.shape
            action_shape = temp_env.action_space.shape

        state_shape = (batch_size, *state_shape)
        action_shape = (batch_size, *action_shape)
        reward_shape = (batch_size, )

        state = env.reset()
        assert state.shape == state_shape
        env.seed(123)
        i = 0
        for obs_batch in take(env, 5):
            assert obs_batch.shape == state_shape

            random_actions = env.action_space.sample()
            assert torch.as_tensor(random_actions).shape == action_shape
            assert temp_env.action_space.contains(random_actions[0])

            reward = env.send(random_actions)
            assert reward.shape == reward_shape
            i += 1
        assert i == 5

    @pytest.mark.parametrize("env_name", ["CartPole-v0"])
    @pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
    def test_batched_pixels(self, env_name: str, batch_size: int):
        max_steps_per_episode = 10

        wrappers = [PixelObservationWrapper]
        env = make_batched_env(env_name,
                               wrappers=wrappers,
                               batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        with gym.make(env_name) as temp_env:
            for wrapper in wrappers:
                temp_env = wrapper(temp_env)

            state_shape = temp_env.observation_space.shape
            action_shape = temp_env.action_space.shape

        state_shape = (batch_size, *state_shape)
        action_shape = (batch_size, *action_shape)
        reward_shape = (batch_size, )

        env = self.GymDataLoader(
            dataset,
            batch_size=batch_size,
        )
        assert isinstance(env.observation_space, spaces.Box)
        assert len(env.observation_space.shape) == 4
        assert env.observation_space.shape[0] == batch_size

        env.seed(1234)
        for i, batch in enumerate(env):
            assert len(batch) == batch_size

            if isinstance(batch, Tensor):
                batch = batch.cpu().numpy()
            assert batch in env.observation_space

            random_actions = env.action_space.sample()
            assert torch.as_tensor(random_actions).shape == action_shape
            assert temp_env.action_space.contains(random_actions[0])

            reward = env.send(random_actions)
            assert reward.shape == reward_shape