Ejemplo n.º 1
0
def test_observation_wrapper_applied_to_passive_environment():
    """ Test that when we apply a gym wrapper to a PassiveEnvironment, it also
    affects the observations / actions / rewards produced when iterating on the
    env.
    """
    batch_size = 5

    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)
    dataset.classes
    env = PassiveEnvironment(
        dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space,
    )

    assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
    assert env.action_space.shape == (batch_size,)
    assert env.reward_space == env.action_space

    env.seed(123)

    check_env(env)

    # Apply a transformation that changes the observation space.
    env = TransformObservation(env=env, f=Compose([Transforms.resize_64x64]))
    assert env.observation_space == Image(0, 1, (batch_size, 3, 64, 64))
    assert env.action_space.shape == (batch_size,)
    assert env.reward_space.shape == (batch_size,)

    env.seed(123)
    check_env(env)

    env.close()
def test_passive_environment_without_pretend_to_be_active():
    """ Test the gym.Env-style interaction with a PassiveEnvironment.
    """
    batch_size = 5
    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    dataset = MNIST("data",
                    transform=Compose(
                        [Transforms.to_tensor, Transforms.three_channels]))
    max_samples = 100
    dataset = Subset(dataset, list(range(max_samples)))

    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)
    env = PassiveEnvironment(
        dataset,
        n_classes=10,
        batch_size=batch_size,
        observation_space=obs_space,
        pretend_to_be_active=False,
    )
    assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
    assert env.action_space.shape == (batch_size, )
    assert env.reward_space == env.action_space
    env.seed(123)
    obs = env.reset()
    assert obs in env.observation_space

    obs, reward, done, info = env.step(env.action_space.sample())
    assert reward is not None

    for i, (obs, reward) in enumerate(env):
        assert reward is not None
        other_reward = env.send(env.action_space.sample())
        assert (other_reward == reward).all()
    assert i == max_samples // batch_size - 1
Ejemplo n.º 3
0
def test_transforms_get_propagated():
    for setting in [
        TaskIncrementalRLSetting(dataset="MetaMonsterKong-v0"),
        SettingProxy(TaskIncrementalRLSetting, dataset="MetaMonsterKong-v0"),
    ]:
        assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
        setting.transforms.append(Transforms.to_tensor)
        setting.transforms.append(Transforms.resize_32x32)
        # TODO: The observation space doesn't update directly in RL whenever the
        # transforms are changed.
        assert setting.observation_space.x == Image(0, 1, shape=(3, 32, 32))
        assert setting.train_dataloader().reset().x.shape == (3, 32, 32)
Ejemplo n.º 4
0
def test_compose_on_image_space():
    in_space = Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
    transform = Compose([Transforms.to_tensor, Transforms.three_channels])
    expected = Image(0, 1., shape=(3, 64, 64), dtype=np.float32) 
    actual = transform(in_space)
   
    assert actual == expected
    env = gym.make("MetaMonsterKong-v0")
    assert env.observation_space == gym.spaces.Box(0, 255, (64, 64, 3), np.uint8)
    assert env.observation_space == in_space
    wrapped_env = TransformObservation(env, transform)
    assert wrapped_env.observation_space == expected
Ejemplo n.º 5
0
def test_rl_track_setting_is_correct():
    setting = SettingProxy(
        IncrementalRLSetting,
        "rl_track",
    )
    assert setting.nb_tasks == 8
    assert setting.dataset == "MetaMonsterKong-v0"
    assert setting.observation_space == NamedTupleSpace(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=Sparse(spaces.Discrete(8)),
    )
    assert setting.action_space == spaces.Discrete(6)
    # TODO: The reward range of the MetaMonsterKongEnv is (0, 50), which seems wrong.
    # This isn't really a big deal though.
    # assert setting.reward_space == spaces.Box(0, 100, shape=(), dtype=np.float32)
    assert setting.steps_per_task == 200_000
    assert setting.test_steps_per_task == 10_000
    assert setting.known_task_boundaries_at_train_time is True
    assert setting.known_task_boundaries_at_test_time is False
    assert setting.monitor_training_performance is True
    assert setting.train_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]
    assert setting.val_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]
    assert setting.test_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]

    train_env = setting.train_dataloader()
    assert train_env.observation_space == NamedTupleSpace(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=spaces.Discrete(8),
    )
    assert train_env.reset() in train_env.observation_space

    valid_env = setting.val_dataloader()
    assert valid_env.observation_space == NamedTupleSpace(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=spaces.Discrete(8),
    )

    # IDEA: Prevent submissions from calling the test_dataloader method or accessing the
    # test_env / test_dataset property?
    with pytest.raises(RuntimeError):
        test_env = setting.test_dataloader()
        test_env.reset()

    with pytest.raises(RuntimeError):
        test_env = setting.test_env
        test_env.reset()
Ejemplo n.º 6
0
def test_is_proxy_to(use_wrapper: bool):
    import numpy as np
    from sequoia.common.transforms import Compose, Transforms

    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    from sequoia.common.spaces import Image
    from torchvision.datasets import MNIST

    batch_size = 32
    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)

    env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
    env: Iterable[Tuple[Tensor, Tensor]] = env_type(
        dataset,
        batch_size=batch_size,
        n_classes=10,
        observation_space=obs_space,
    )
    if use_wrapper:
        assert isinstance(env, EnvironmentProxy)
        assert issubclass(type(env), EnvironmentProxy)
        assert is_proxy_to(env, PassiveEnvironment)
    else:
        assert not is_proxy_to(env, PassiveEnvironment)
Ejemplo n.º 7
0
def test_sl_track_setting_is_correct():
    setting = SettingProxy(
        ClassIncrementalSetting,
        "sl_track",
    )
    assert setting.nb_tasks == 12
    assert setting.dataset == "synbols"
    assert setting.observation_space == NamedTupleSpace(
        x=Image(0, 1, (3, 32, 32), dtype=np.float32),
        task_labels=spaces.Discrete(12),
    )
    assert setting.n_classes_per_task == 4
    assert setting.action_space == spaces.Discrete(48)
    assert setting.reward_space == spaces.Discrete(48)
    assert setting.known_task_boundaries_at_train_time is True
    assert setting.known_task_boundaries_at_test_time is False
    assert setting.monitor_training_performance is True
    assert setting.train_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]
    assert setting.val_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]
    assert setting.test_transforms == [
        Transforms.to_tensor, Transforms.three_channels
    ]
Ejemplo n.º 8
0
    def test_dqn_monsterkong_adds_channel_first_transform(self):
        method = self.Method(**self.debug_kwargs)
        setting = IncrementalRLSetting(
            dataset="monsterkong",
            nb_tasks=2,
            steps_per_task=1_000,
            test_steps_per_task=1_000,
        )
        assert setting.train_max_steps == 2_000
        assert setting.max_steps == 2_000
        assert setting.test_steps == 2_000
        assert setting.nb_tasks == 2
        assert setting.observation_space.x == Image(0,
                                                    255,
                                                    shape=(64, 64, 3),
                                                    dtype=np.uint8)
        assert setting.observation_space.task_labels.n == 2
        # assert setting.observation_space == TypedDictSpace(
        #     spaces={
        #         "x": Image(0, 255, shape=(64, 64, 3), dtype=np.uint8),
        #         "task_labels": Sparse(spaces.Discrete(2), sparsity=0.5),
        #         "done": Sparse(spaces.Box(False, True, (), dtype=np.bool), sparsity=1),
        #     },
        #     dtype=setting.Observations,
        # )
        assert setting.observation_space.dtype is setting.Observations
        assert setting.action_space == spaces.Discrete(
            6)  # monsterkong has 6 actions.

        # (Before the method gets to change the Setting):
        # By default the setting gives the same shape of obs as the underlying env.
        for env_method in [
                setting.train_dataloader,
                setting.val_dataloader,
                setting.test_dataloader,
        ]:
            print(f"Testing method {env_method.__name__}")
            with env_method() as env:
                reset_obs = env.reset()
                # TODO: Fix this so the 'x' space actually gets tensor support.
                # assert reset_obs in env.observation_space
                assert reset_obs.numpy() in env.observation_space
                assert reset_obs.x.shape == (64, 64, 3)

        # Let the Method configure itself on the Setting:
        method.configure(setting)

        # (After the method gets to change the Setting):

        for env_method in [
                setting.train_dataloader,
                setting.val_dataloader,
                setting.test_dataloader,
        ]:
            with env_method() as env:
                reset_obs = env.reset()
                # Fix this numpy bug.
                assert reset_obs.numpy() in env.observation_space
                assert reset_obs.x.shape == (64, 64, 3)
Ejemplo n.º 9
0
def test_transforms_get_propagated():
    for setting in [
        TaskIncrementalRLSetting(dataset="cartpole"), 
        SettingProxy(TaskIncrementalRLSetting, dataset="cartpole"),
    ]:
        assert setting.observation_space.x == Image(0, 1, shape=(3, 400, 600))
        setting.train_transforms.append(Transforms.resize_64x64)
        # TODO: The observation space doesn't update directly in RL whenever the
        # transforms are changed.
        # assert setting.observation_space.x == Image(0, 1, shape=(3, 64, 64))
        assert setting.train_dataloader().reset().x.shape == (3, 64, 64)
def test_multitask_setting_test_env():
    setting = MultiTaskSetting(dataset="mnist")

    assert setting.phases == 1
    assert setting.nb_tasks == 5
    assert setting.observation_space == NamedTupleSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5)
    )
    assert setting.action_space == Discrete(10)

    # FIXME: Wait, actually, this test environment, will it be shuffled, or not?
    with setting.test_dataloader(batch_size=32, num_workers=0) as test_env:
        check_is_multitask_env(test_env, has_rewards=False)
Ejemplo n.º 11
0
def test_domain_incremental_mnist_setup():
    setting = DomainIncrementalSLSetting(dataset="mnist", increment=2,)
    setting.prepare_data(data_dir="data")
    setting.setup()
    assert setting.observation_space == TypedDictSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32),
        task_labels=Discrete(5),
        dtype=setting.Observations,
    )
    assert setting.observation_space.dtype == setting.Observations
    assert setting.action_space == spaces.Discrete(2)
    assert setting.reward_space == spaces.Discrete(2)

    for i in range(setting.nb_tasks):
        setting.current_task_id = i
        batch_size = 5
        train_loader = setting.train_dataloader(batch_size=batch_size)

        for j, (observations, rewards) in enumerate(
            itertools.islice(train_loader, 100)
        ):
            x = observations.x
            t = observations.task_labels
            y = rewards.y
            print(i, j, y, t)
            assert x.shape == (batch_size, 3, 28, 28)
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
            assert all(t == i)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            rewards_ = train_loader.send([4 for _ in range(batch_size)])
            assert (rewards.y == rewards_.y).all()

        train_loader.close()

        test_loader = setting.test_dataloader(batch_size=batch_size)
        for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)):
            assert rewards is None

            x = observations.x
            t = observations.task_labels
            assert t is None
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            rewards = test_loader.send([4 for _ in range(batch_size)])
            assert rewards is not None
            y = rewards.y
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
Ejemplo n.º 12
0
def test_dqn_monsterkong_adds_channel_first_transform():
    method = DQNMethod()
    setting = IncrementalRLSetting(
        dataset="monsterkong",
        nb_tasks=2,
        steps_per_task=1_000,
        test_steps_per_task=1_000,
    )
    assert setting.max_steps == 2_000
    assert setting.test_steps == 2_000
    assert setting.nb_tasks == 2
    assert setting.observation_space == NamedTupleSpace(
        spaces={
            "x": Image(0, 1, shape=(3, 64, 64), dtype=np.float32),
            "task_labels": Sparse(spaces.Discrete(2)),
        },
        dtype=setting.Observations,
    )
    assert setting.action_space == spaces.Discrete(
        6)  # monsterkong has 6 actions.

    # (Before the method gets to change the Setting):
    # By default the setting gives the same shape of obs as the underlying env.
    for env_method in [
            setting.train_dataloader,
            setting.val_dataloader,
            setting.test_dataloader,
    ]:
        print(f"Testing method {env_method.__name__}")
        with env_method() as env:
            reset_obs = env.reset()
            # TODO: Fix this so the 'x' space actually gets tensor support.
            # assert reset_obs in env.observation_space
            assert reset_obs.numpy() in env.observation_space
            assert reset_obs.x.shape == (3, 64, 64)

    # Let the Method configure itself on the Setting:
    method.configure(setting)

    # (After the method gets to change the Setting):

    for env_method in [
            setting.train_dataloader,
            setting.val_dataloader,
            setting.test_dataloader,
    ]:
        with env_method() as env:
            reset_obs = env.reset()
            # Fix this numpy bug.
            assert reset_obs.numpy() in env.observation_space
            assert reset_obs.x.shape == (64, 64, 3)
def test_domain_incremental_mnist_setup():
    setting = DomainIncrementalSetting(
        dataset="mnist",
        increment=2,
    )
    setting.prepare_data(data_dir="data")
    setting.setup()
    assert setting.observation_space == NamedTupleSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5))

    for i in range(setting.nb_tasks):
        setting.current_task_id = i
        batch_size = 5
        train_loader = setting.train_dataloader(batch_size=batch_size)

        for j, (observations,
                rewards) in enumerate(itertools.islice(train_loader, 100)):
            x = observations.x
            t = observations.task_labels
            y = rewards.y
            print(i, j, y, t)
            assert x.shape == (batch_size, 3, 28, 28)
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
            assert all(t == i)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            reward = train_loader.send([4 for _ in range(batch_size)])
            # TODO: Why are we fine with getting `None` as the reward here? Is it
            # because we're somehow setting it to be ``
            assert reward is None

        train_loader.close()

        test_loader = setting.test_dataloader(batch_size=batch_size)
        for j, (observations,
                rewards) in enumerate(itertools.islice(test_loader, 100)):
            assert rewards is None

            x = observations.x
            t = observations.task_labels
            assert t is None
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            rewards = test_loader.send([4 for _ in range(batch_size)])
            assert rewards is not None
            y = rewards.y
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
def test_multitask_setting():
    setting = MultiTaskSetting(dataset="mnist")

    assert setting.phases == 1
    assert setting.nb_tasks == 5
    assert setting.observation_space == NamedTupleSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5)
    )
    assert setting.action_space == Discrete(10)

    with setting.train_dataloader(batch_size=32, num_workers=0) as train_env:
        check_is_multitask_env(train_env, has_rewards=True)

    with setting.val_dataloader(batch_size=32, num_workers=0) as val_env:
        check_is_multitask_env(val_env, has_rewards=True)
Ejemplo n.º 15
0
def test_passive_environment_as_dataloader():
    batch_size = 1
    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)

    env: Iterable[Tuple[Tensor, Tensor]] = PassiveEnvironment(
        dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space,
    )

    for x, y in env:
        assert x.shape == (batch_size, 3, 28, 28)
        x = x.permute(0, 2, 3, 1)
        assert y.tolist() == [5]
        break
Ejemplo n.º 16
0
    def test_monsterkong(self, state: bool):
        """ Checks that the MonsterKong env works fine with pixel and state input.
        """
        setting = self.Setting(
            dataset="StateMetaMonsterKong-v0"
            if state else "PixelMetaMonsterKong-v0",
            # force_state_observations=state,
            # force_pixel_observations=(not state),
            nb_tasks=5,
            train_max_steps=500,
            test_max_steps=500,
            # steps_per_task=100,
            # test_steps_per_task=100,
            train_transforms=[],
            test_transforms=[],
            val_transforms=[],
            max_episode_steps=10,
        )

        if state:
            # State-based monsterkong: We observe a flattened version of the game state
            # (20 x 20 grid + player cell and goal cell, IIRC.)
            assert setting.observation_space.x == spaces.Box(
                0, 292, (402, ),
                np.int16), setting._temp_train_env.observation_space
        else:
            assert setting.observation_space.x == Image(
                0, 255, (64, 64, 3), np.uint8)

        if setting.task_labels_at_test_time:
            assert setting.observation_space.task_labels == spaces.Discrete(5)
        else:
            assert setting.task_labels_at_train_time
            assert setting.observation_space.task_labels == Sparse(
                spaces.Discrete(5),
                sparsity=0.5,  # 0.5 since we have task labels at train time.
            )

        assert setting.test_max_steps == 500
        with setting.train_dataloader() as env:
            obs = env.reset()
            assert obs in setting.observation_space

        method = DummyMethod()
        results = setting.apply(method)

        self.validate_results(setting, method, results)
def test_passive_environment_needs_actions_to_be_sent():
    """ Test the 'active dataloader' style interaction.
    """
    batch_size = 10
    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    dataset = MNIST("data",
                    transform=Compose(
                        [Transforms.to_tensor, Transforms.three_channels]))
    max_samples = 105
    dataset = Subset(dataset, list(range(max_samples)))

    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)
    env = PassiveEnvironment(
        dataset,
        n_classes=10,
        batch_size=batch_size,
        observation_space=obs_space,
        pretend_to_be_active=True,
        strict=True,
    )

    with pytest.raises(RuntimeError):
        for i, (obs, _) in enumerate(env):
            pass

    env = PassiveEnvironment(
        dataset,
        n_classes=10,
        batch_size=batch_size,
        observation_space=obs_space,
        pretend_to_be_active=True,
    )
    for i, (obs, _) in enumerate(env):
        assert isinstance(obs, Tensor)
        action = env.action_space.sample()[:obs.shape[0]]
        rewards = env.send(action)
        assert rewards is not None
        assert rewards.shape[0] == action.shape[0]
Ejemplo n.º 18
0
def test_issue_204():
    """ Test that reproduces the issue #204, which was that some zombie processes
    appeared to be created when iterating using an EnvironmentProxy.
    
    The issue appears to have been caused by calling `self.__environment.reset()` in
    `__iter__`, which I think caused another dataloader iterator to be created?
    """
    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])

    batch_size = 2048
    num_workers = 12

    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)

    current_process = psutil.Process()
    print(
        f"Current process is using {current_process.num_threads()} threads, with "
        f" {len(current_process.children(recursive=True))} child processes.")
    starting_threads = current_process.num_threads()
    starting_processes = len(current_process.children(recursive=True))

    for use_wrapper in [False, True]:

        threads = current_process.num_threads()
        processes = len(current_process.children(recursive=True))
        assert threads == starting_threads
        assert processes == starting_processes

        env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
        env: Iterable[Tuple[Tensor, Tensor]] = env_type(
            dataset,
            batch_size=batch_size,
            n_classes=10,
            observation_space=obs_space,
            num_workers=num_workers,
            persistent_workers=True,
        )
        for i, _ in enumerate(env):
            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            assert threads == starting_threads + num_workers
            assert processes == starting_processes + num_workers
            print(f"Current process is using {threads} threads, with "
                  f" {processes} child processes.")

        for i, _ in enumerate(env):
            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            assert threads == starting_threads + num_workers
            assert processes == starting_processes + num_workers
            print(f"Current process is using {threads} threads, with "
                  f" {processes} child processes.")

        obs = env.reset()
        done = False
        while not done:
            obs, reward, done, info = env.step(env.action_space.sample())

            # env.render(mode="human")

            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            if not done:
                assert threads == starting_threads + num_workers
                assert processes == starting_processes + num_workers
                print(f"Current process is using {threads} threads, with "
                      f" {processes} child processes.")

        env.close()

        import time
        # Need to give it a second (or so) to cleanup.
        time.sleep(1)

        threads = current_process.num_threads()
        processes = len(current_process.children(recursive=True))
        assert threads == starting_threads
        assert processes == starting_processes
Ejemplo n.º 19
0
    def __init__(
        self,
        dataset: Union[IterableDataset, Dataset],
        split_batch_fn: Callable[
            [Tuple[Any, ...]], Tuple[ObservationType, ActionType]
        ] = None,
        observation_space: gym.Space = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
        n_classes: int = None,
        pretend_to_be_active: bool = False,
        strict: bool = False,
        **kwargs,
    ):
        """Creates the DataLoader/Environment for the given dataset.
        
        Parameters
        ----------
        dataset : Union[IterableDataset, Dataset]
            The dataset to iterate on. Should ideally be indexable (a Map-style
            dataset).

        split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional
            A function to call on each item in the dataset in order to split it into
            Observations and Rewards, by default None, in which case we assume that the
            dataset items are tuples of length 2.

        observation_space : gym.Space, optional
            The single (non-batched) observation space. Default to `None`, in which case
            this will try to infer the shape of the space using the first item in the
            dataset.

        action_space : gym.Space, optional
            The non-batched action space. Defaults to None, in which case the
            `n_classes` argument must be passed, and the action space is assumed to be
            discrete (i.e. that the loader is for a classification dataset).

        reward_space : gym.Space, optional
            The non-batched reward (label) space. Defaults to `None`, in which case it
            will be the same as the action space (as is the case in classification).

        n_classes : int, optional
            Number of classes in the dataset. Used in case `action_space` isn't passed.
            Defaults to `None`.

        pretend_to_be_active : bool, optional
            Wether to withhold the rewards (labels) from the batches when being
            iterated on like the usual dataloader, and to only give them back
            after an action is received through the 'send' method. False by
            default, in which case this behaves exactly as a normal dataloader
            when being iterated on.
            
            When False, the batches yielded by this dataloader will be of the form
            `Tuple[Observations, Rewards]` (as usual in SL).
            However, when set to True, the batches will be `Tuple[Observations, None]`!
            Rewards will then be returned by the environment when an action is passed to
            the Send method.

        strict : bool, optional
            [description], by default False
            
        # Examples:
        ```python
        train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10)    
        
        # The usual Dataloader-style:
        for x, y in train_env:
            # train as usual
            (...)
        
        # OpenAI Gym style:
        for episode in range(5):
            # NOTE: "episode" in RL is an "epoch" in SL:
            obs = train_env.reset()
            done = False
            while not done:
                actions = train_env.action_space.sample()
                obs, rewards, done, info = train_env.step(actions)
        ```
        
        """
        
        super().__init__(dataset=dataset, **kwargs)
        self.split_batch_fn = split_batch_fn

        # TODO: When the spaces aren't passed explicitly, assumes a classification dataset.
        if not observation_space:
            # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use
            # min_max of the dataset samples too.
            first_item = self.dataset[0]
            if isinstance(first_item, tuple):
                x, *_ = first_item
            else:
                assert isinstance(first_item, (np.ndarray, Tensor))
                x = first_item
            observation_space = Image(0.0, 1.0, x.shape)
        if not action_space:
            assert n_classes, "must pass either `action_space`, or `n_classes` for now"
            action_space = spaces.Discrete(n_classes)
        elif isinstance(action_space, spaces.Discrete):
            n_classes = action_space.n

        if not reward_space:
            # Assuming a classification dataset by default:
            # (action space = reward space = Discrete(n_classes))
            reward_space = action_space

        assert observation_space
        assert action_space
        assert reward_space

        self.single_observation_space: Space = observation_space 
        self.single_action_space: Space = action_space 
        self.single_reward_space: Space = reward_space

        if self.batch_size:
            observation_space = batch_space(observation_space, self.batch_size)
            action_space = batch_space(action_space, self.batch_size)
            reward_space = batch_space(reward_space, self.batch_size)
    
        self.observation_space: gym.Space = add_tensor_support(observation_space)
        self.action_space: gym.Space = add_tensor_support(action_space)
        self.reward_space: gym.Space = add_tensor_support(reward_space)

        self.pretend_to_be_active = pretend_to_be_active
        self._strict = strict
        self._reward_queue = deque(maxlen=10)

        self.n_classes: Optional[int] = n_classes
        self._iterator: Optional[_BaseDataLoaderIter] = None
        # NOTE: These here are never processed with self.observation or self.reward.
        self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._done: Optional[bool] = None
        self._closed: bool = False

        self._action: Optional[ActionType] = None

        # from gym.envs.classic_control.rendering import SimpleImageViewer
        self.viewer = None
Ejemplo n.º 20
0
from ..passive_environment import Actions, PassiveEnvironment, Rewards
from ..passive_setting import PassiveSetting
from .class_incremental_results import ClassIncrementalResults
from .measure_performance_wrapper import MeasureSLPerformanceWrapper

logger = get_logger(__file__)

# NOTE: This dict reflects the observation space of the different datasets
# *BEFORE* any transforms are applied. The resulting property on the Setting is
# based on this 'base' observation space, passed through the transforms.
# TODO: Make it possible to automatically add tensor support if the dtype passed to a
# gym space is a `torch.dtype`.
tensor_space = add_tensor_support

base_observation_spaces: Dict[str, Space] = {
    "mnist": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    "fashionmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    "kmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    "emnist": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    "qmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    "mnistfellowship": tensor_space(Image(0, 1, shape=(1, 28, 28))),
    # TODO: Determine the true bounds on the image values in cifar10.
    # Appears to be  ~= [-2.5, 2.5]
    "cifar10": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
    "cifar100": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
    "cifarfellowship": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
    "imagenet100": tensor_space(Image(0, 1, shape=(224, 224, 3))),
    "imagenet1000": tensor_space(Image(0, 1, shape=(224, 224, 3))),
    "core50": tensor_space(Image(0, 1, shape=(224, 224, 3))),
    "core50-v2-79": tensor_space(Image(0, 1, shape=(224, 224, 3))),
    "core50-v2-196": tensor_space(Image(0, 1, shape=(224, 224, 3))),
Ejemplo n.º 21
0
from sequoia.common.gym_wrappers.batch_env.tile_images import tile_images
from .objects import (
    Actions,
    ActionType,
    Observations,
    ObservationType,
    Rewards,
    RewardType,
)

logger = get_logger(__file__)

base_observation_spaces: Dict[str, Space] = {
    dataset_class.__name__.lower(): space
    for dataset_class, space in {
        MNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        FashionMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        KMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        EMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        QMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        MNISTFellowship: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        # TODO: Determine the true bounds on the image values in cifar10.
        # Appears to be  ~= [-2.5, 2.5]
        CIFAR10: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
        CIFAR100: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
        CIFARFellowship: tensor_space(Image(-np.inf, np.inf, shape=(3, 32,
                                                                    32))),
        ImageNet100: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        ImageNet1000: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50v2_79: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Ejemplo n.º 22
0
def test_monsterkong(task_labels_at_test_time: bool, state: bool):
    """ checks that the MonsterKong env works fine with monsterkong and state input. """
    setting = IncrementalRLSetting(
        dataset="monsterkong",
        observe_state_directly=state,
        nb_tasks=5,
        steps_per_task=100,
        test_steps_per_task=100,
        train_transforms=[],
        test_transforms=[],
        val_transforms=[],
        task_labels_at_test_time=task_labels_at_test_time,
        max_episode_steps=10,
    )

    if state:
        # State-based monsterkong: We observe a flattened version of the game state
        # (20 x 20 grid + player cell and goal cell, IIRC.)
        assert setting.observation_space.x == spaces.Box(
            0, 292, (402, ), np.int16)
    else:
        assert setting.observation_space.x == Image(0, 255, (64, 64, 3),
                                                    np.uint8)

    if task_labels_at_test_time:
        assert setting.observation_space.task_labels == spaces.Discrete(5)
    else:
        assert setting.observation_space.task_labels == Sparse(
            spaces.Discrete(5), sparsity=0.0)

    assert setting.test_steps == 500
    with setting.train_dataloader() as env:
        obs = env.reset()
        assert obs in setting.observation_space

    method = DummyMethod()
    _ = setting.apply(method)

    assert method.n_task_switches == 30
    if task_labels_at_test_time:
        assert method.received_task_ids == [
            0,
            *list(range(5)),
            1,
            *list(range(5)),
            2,
            *list(range(5)),
            3,
            *list(range(5)),
            4,
            *list(range(5)),
        ]
    else:
        assert method.received_task_ids == [
            0,
            *[None for _ in range(5)],
            1,
            *[None for _ in range(5)],
            2,
            *[None for _ in range(5)],
            3,
            *[None for _ in range(5)],
            4,
            *[None for _ in range(5)],
        ]
    assert method.received_while_training == [
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
    ]
Ejemplo n.º 23
0
    # "rotatedmnist": (28, 28, 1),
    "core50": (3, 224, 224),
    "core50-v2-79": (3, 224, 224),
    "core50-v2-196": (3, 224, 224),
    "core50-v2-391": (3, 224, 224),
    "synbols": (3, 224, 224),
}

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support

# NOTE: This dict reflects the observation space of the different datasets
# *BEFORE* any transforms are applied. The resulting property on the Setting is
# based on this 'base' observation space, passed through the transforms.

base_observation_spaces: Dict[str, Space] = {
    dataset_name: add_tensor_support(Image(0, 1, image_shape, np.float32))
    for dataset_name, image_shape in {
        "mnist": (1, 28, 28),
        "fashionmnist": (1, 28, 28),
        "kmnist": (28, 28, 1),
        "emnist": (28, 28, 1),
        "qmnist": (28, 28, 1),
        "mnistfellowship": (28, 28, 1),
        "cifar10": (32, 32, 3),
        "cifar100": (32, 32, 3),
        "cifarfellowship": (32, 32, 3),
        "imagenet100": (224, 224, 3),
        "imagenet1000": (224, 224, 3),
        # "permutedmnist": (28, 28, 1),
        # "rotatedmnist": (28, 28, 1),
        "core50": (224, 224, 3),
Ejemplo n.º 24
0
def test_monsterkong_pixels(task_labels_at_test_time: bool):
    """ checks that the MonsterKong env works fine with monsterkong and state input. """
    setting = IncrementalRLSetting(
        dataset="monsterkong",
        observe_state_directly=False,
        nb_tasks=5,
        steps_per_task=100,
        test_steps_per_task=100,
        train_transforms=[],
        test_transforms=[],
        val_transforms=[],
        task_labels_at_test_time=task_labels_at_test_time,
        max_episode_steps=10,
    )
    assert setting.test_steps == 500
    assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8)
    with setting.train_dataloader() as env:
        obs = env.reset()
        assert obs in setting.observation_space

    method = DummyMethod()
    results = setting.apply(method)

    assert method.n_task_switches == 30
    if task_labels_at_test_time:
        assert method.received_task_ids == [
            0,
            *list(range(5)),
            1,
            *list(range(5)),
            2,
            *list(range(5)),
            3,
            *list(range(5)),
            4,
            *list(range(5)),
        ]
    else:
        assert method.received_task_ids == [
            0,
            *[None for _ in range(5)],
            1,
            *[None for _ in range(5)],
            2,
            *[None for _ in range(5)],
            3,
            *[None for _ in range(5)],
            4,
            *[None for _ in range(5)],
        ]
    assert method.received_while_training == [
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
        True,
        *[False for _ in range(5)],
    ]