def test_observation_wrapper_applied_to_passive_environment(): """ Test that when we apply a gym wrapper to a PassiveEnvironment, it also affects the observations / actions / rewards produced when iterating on the env. """ batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) dataset.classes env = PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size,) assert env.reward_space == env.action_space env.seed(123) check_env(env) # Apply a transformation that changes the observation space. env = TransformObservation(env=env, f=Compose([Transforms.resize_64x64])) assert env.observation_space == Image(0, 1, (batch_size, 3, 64, 64)) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) check_env(env) env.close()
def test_passive_environment_without_pretend_to_be_active(): """ Test the gym.Env-style interaction with a PassiveEnvironment. """ batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=Compose( [Transforms.to_tensor, Transforms.three_channels])) max_samples = 100 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=False, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size, ) assert env.reward_space == env.action_space env.seed(123) obs = env.reset() assert obs in env.observation_space obs, reward, done, info = env.step(env.action_space.sample()) assert reward is not None for i, (obs, reward) in enumerate(env): assert reward is not None other_reward = env.send(env.action_space.sample()) assert (other_reward == reward).all() assert i == max_samples // batch_size - 1
def test_transforms_get_propagated(): for setting in [ TaskIncrementalRLSetting(dataset="MetaMonsterKong-v0"), SettingProxy(TaskIncrementalRLSetting, dataset="MetaMonsterKong-v0"), ]: assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) setting.transforms.append(Transforms.to_tensor) setting.transforms.append(Transforms.resize_32x32) # TODO: The observation space doesn't update directly in RL whenever the # transforms are changed. assert setting.observation_space.x == Image(0, 1, shape=(3, 32, 32)) assert setting.train_dataloader().reset().x.shape == (3, 32, 32)
def test_compose_on_image_space(): in_space = Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) transform = Compose([Transforms.to_tensor, Transforms.three_channels]) expected = Image(0, 1., shape=(3, 64, 64), dtype=np.float32) actual = transform(in_space) assert actual == expected env = gym.make("MetaMonsterKong-v0") assert env.observation_space == gym.spaces.Box(0, 255, (64, 64, 3), np.uint8) assert env.observation_space == in_space wrapped_env = TransformObservation(env, transform) assert wrapped_env.observation_space == expected
def test_rl_track_setting_is_correct(): setting = SettingProxy( IncrementalRLSetting, "rl_track", ) assert setting.nb_tasks == 8 assert setting.dataset == "MetaMonsterKong-v0" assert setting.observation_space == NamedTupleSpace( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=Sparse(spaces.Discrete(8)), ) assert setting.action_space == spaces.Discrete(6) # TODO: The reward range of the MetaMonsterKongEnv is (0, 50), which seems wrong. # This isn't really a big deal though. # assert setting.reward_space == spaces.Box(0, 100, shape=(), dtype=np.float32) assert setting.steps_per_task == 200_000 assert setting.test_steps_per_task == 10_000 assert setting.known_task_boundaries_at_train_time is True assert setting.known_task_boundaries_at_test_time is False assert setting.monitor_training_performance is True assert setting.train_transforms == [ Transforms.to_tensor, Transforms.three_channels ] assert setting.val_transforms == [ Transforms.to_tensor, Transforms.three_channels ] assert setting.test_transforms == [ Transforms.to_tensor, Transforms.three_channels ] train_env = setting.train_dataloader() assert train_env.observation_space == NamedTupleSpace( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=spaces.Discrete(8), ) assert train_env.reset() in train_env.observation_space valid_env = setting.val_dataloader() assert valid_env.observation_space == NamedTupleSpace( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=spaces.Discrete(8), ) # IDEA: Prevent submissions from calling the test_dataloader method or accessing the # test_env / test_dataset property? with pytest.raises(RuntimeError): test_env = setting.test_dataloader() test_env.reset() with pytest.raises(RuntimeError): test_env = setting.test_env test_env.reset()
def test_is_proxy_to(use_wrapper: bool): import numpy as np from sequoia.common.transforms import Compose, Transforms transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) from sequoia.common.spaces import Image from torchvision.datasets import MNIST batch_size = 32 dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment env: Iterable[Tuple[Tensor, Tensor]] = env_type( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, ) if use_wrapper: assert isinstance(env, EnvironmentProxy) assert issubclass(type(env), EnvironmentProxy) assert is_proxy_to(env, PassiveEnvironment) else: assert not is_proxy_to(env, PassiveEnvironment)
def test_sl_track_setting_is_correct(): setting = SettingProxy( ClassIncrementalSetting, "sl_track", ) assert setting.nb_tasks == 12 assert setting.dataset == "synbols" assert setting.observation_space == NamedTupleSpace( x=Image(0, 1, (3, 32, 32), dtype=np.float32), task_labels=spaces.Discrete(12), ) assert setting.n_classes_per_task == 4 assert setting.action_space == spaces.Discrete(48) assert setting.reward_space == spaces.Discrete(48) assert setting.known_task_boundaries_at_train_time is True assert setting.known_task_boundaries_at_test_time is False assert setting.monitor_training_performance is True assert setting.train_transforms == [ Transforms.to_tensor, Transforms.three_channels ] assert setting.val_transforms == [ Transforms.to_tensor, Transforms.three_channels ] assert setting.test_transforms == [ Transforms.to_tensor, Transforms.three_channels ]
def test_dqn_monsterkong_adds_channel_first_transform(self): method = self.Method(**self.debug_kwargs) setting = IncrementalRLSetting( dataset="monsterkong", nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) assert setting.train_max_steps == 2_000 assert setting.max_steps == 2_000 assert setting.test_steps == 2_000 assert setting.nb_tasks == 2 assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) assert setting.observation_space.task_labels.n == 2 # assert setting.observation_space == TypedDictSpace( # spaces={ # "x": Image(0, 255, shape=(64, 64, 3), dtype=np.uint8), # "task_labels": Sparse(spaces.Discrete(2), sparsity=0.5), # "done": Sparse(spaces.Box(False, True, (), dtype=np.bool), sparsity=1), # }, # dtype=setting.Observations, # ) assert setting.observation_space.dtype is setting.Observations assert setting.action_space == spaces.Discrete( 6) # monsterkong has 6 actions. # (Before the method gets to change the Setting): # By default the setting gives the same shape of obs as the underlying env. for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: print(f"Testing method {env_method.__name__}") with env_method() as env: reset_obs = env.reset() # TODO: Fix this so the 'x' space actually gets tensor support. # assert reset_obs in env.observation_space assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (64, 64, 3) # Let the Method configure itself on the Setting: method.configure(setting) # (After the method gets to change the Setting): for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: with env_method() as env: reset_obs = env.reset() # Fix this numpy bug. assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (64, 64, 3)
def test_transforms_get_propagated(): for setting in [ TaskIncrementalRLSetting(dataset="cartpole"), SettingProxy(TaskIncrementalRLSetting, dataset="cartpole"), ]: assert setting.observation_space.x == Image(0, 1, shape=(3, 400, 600)) setting.train_transforms.append(Transforms.resize_64x64) # TODO: The observation space doesn't update directly in RL whenever the # transforms are changed. # assert setting.observation_space.x == Image(0, 1, shape=(3, 64, 64)) assert setting.train_dataloader().reset().x.shape == (3, 64, 64)
def test_multitask_setting_test_env(): setting = MultiTaskSetting(dataset="mnist") assert setting.phases == 1 assert setting.nb_tasks == 5 assert setting.observation_space == NamedTupleSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5) ) assert setting.action_space == Discrete(10) # FIXME: Wait, actually, this test environment, will it be shuffled, or not? with setting.test_dataloader(batch_size=32, num_workers=0) as test_env: check_is_multitask_env(test_env, has_rewards=False)
def test_domain_incremental_mnist_setup(): setting = DomainIncrementalSLSetting(dataset="mnist", increment=2,) setting.prepare_data(data_dir="data") setting.setup() assert setting.observation_space == TypedDictSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5), dtype=setting.Observations, ) assert setting.observation_space.dtype == setting.Observations assert setting.action_space == spaces.Discrete(2) assert setting.reward_space == spaces.Discrete(2) for i in range(setting.nb_tasks): setting.current_task_id = i batch_size = 5 train_loader = setting.train_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate( itertools.islice(train_loader, 100) ): x = observations.x t = observations.task_labels y = rewards.y print(i, j, y, t) assert x.shape == (batch_size, 3, 28, 28) assert ((0 <= y) & (y < setting.n_classes_per_task)).all() assert all(t == i) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) rewards_ = train_loader.send([4 for _ in range(batch_size)]) assert (rewards.y == rewards_.y).all() train_loader.close() test_loader = setting.test_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)): assert rewards is None x = observations.x t = observations.task_labels assert t is None assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) rewards = test_loader.send([4 for _ in range(batch_size)]) assert rewards is not None y = rewards.y assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
def test_dqn_monsterkong_adds_channel_first_transform(): method = DQNMethod() setting = IncrementalRLSetting( dataset="monsterkong", nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) assert setting.max_steps == 2_000 assert setting.test_steps == 2_000 assert setting.nb_tasks == 2 assert setting.observation_space == NamedTupleSpace( spaces={ "x": Image(0, 1, shape=(3, 64, 64), dtype=np.float32), "task_labels": Sparse(spaces.Discrete(2)), }, dtype=setting.Observations, ) assert setting.action_space == spaces.Discrete( 6) # monsterkong has 6 actions. # (Before the method gets to change the Setting): # By default the setting gives the same shape of obs as the underlying env. for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: print(f"Testing method {env_method.__name__}") with env_method() as env: reset_obs = env.reset() # TODO: Fix this so the 'x' space actually gets tensor support. # assert reset_obs in env.observation_space assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (3, 64, 64) # Let the Method configure itself on the Setting: method.configure(setting) # (After the method gets to change the Setting): for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: with env_method() as env: reset_obs = env.reset() # Fix this numpy bug. assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (64, 64, 3)
def test_domain_incremental_mnist_setup(): setting = DomainIncrementalSetting( dataset="mnist", increment=2, ) setting.prepare_data(data_dir="data") setting.setup() assert setting.observation_space == NamedTupleSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5)) for i in range(setting.nb_tasks): setting.current_task_id = i batch_size = 5 train_loader = setting.train_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)): x = observations.x t = observations.task_labels y = rewards.y print(i, j, y, t) assert x.shape == (batch_size, 3, 28, 28) assert ((0 <= y) & (y < setting.n_classes_per_task)).all() assert all(t == i) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) reward = train_loader.send([4 for _ in range(batch_size)]) # TODO: Why are we fine with getting `None` as the reward here? Is it # because we're somehow setting it to be `` assert reward is None train_loader.close() test_loader = setting.test_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)): assert rewards is None x = observations.x t = observations.task_labels assert t is None assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) rewards = test_loader.send([4 for _ in range(batch_size)]) assert rewards is not None y = rewards.y assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
def test_multitask_setting(): setting = MultiTaskSetting(dataset="mnist") assert setting.phases == 1 assert setting.nb_tasks == 5 assert setting.observation_space == NamedTupleSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5) ) assert setting.action_space == Discrete(10) with setting.train_dataloader(batch_size=32, num_workers=0) as train_env: check_is_multitask_env(train_env, has_rewards=True) with setting.val_dataloader(batch_size=32, num_workers=0) as val_env: check_is_multitask_env(val_env, has_rewards=True)
def test_passive_environment_as_dataloader(): batch_size = 1 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env: Iterable[Tuple[Tensor, Tensor]] = PassiveEnvironment( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, ) for x, y in env: assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1) assert y.tolist() == [5] break
def test_monsterkong(self, state: bool): """ Checks that the MonsterKong env works fine with pixel and state input. """ setting = self.Setting( dataset="StateMetaMonsterKong-v0" if state else "PixelMetaMonsterKong-v0", # force_state_observations=state, # force_pixel_observations=(not state), nb_tasks=5, train_max_steps=500, test_max_steps=500, # steps_per_task=100, # test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], max_episode_steps=10, ) if state: # State-based monsterkong: We observe a flattened version of the game state # (20 x 20 grid + player cell and goal cell, IIRC.) assert setting.observation_space.x == spaces.Box( 0, 292, (402, ), np.int16), setting._temp_train_env.observation_space else: assert setting.observation_space.x == Image( 0, 255, (64, 64, 3), np.uint8) if setting.task_labels_at_test_time: assert setting.observation_space.task_labels == spaces.Discrete(5) else: assert setting.task_labels_at_train_time assert setting.observation_space.task_labels == Sparse( spaces.Discrete(5), sparsity=0.5, # 0.5 since we have task labels at train time. ) assert setting.test_max_steps == 500 with setting.train_dataloader() as env: obs = env.reset() assert obs in setting.observation_space method = DummyMethod() results = setting.apply(method) self.validate_results(setting, method, results)
def test_passive_environment_needs_actions_to_be_sent(): """ Test the 'active dataloader' style interaction. """ batch_size = 10 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=Compose( [Transforms.to_tensor, Transforms.three_channels])) max_samples = 105 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, strict=True, ) with pytest.raises(RuntimeError): for i, (obs, _) in enumerate(env): pass env = PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, ) for i, (obs, _) in enumerate(env): assert isinstance(obs, Tensor) action = env.action_space.sample()[:obs.shape[0]] rewards = env.send(action) assert rewards is not None assert rewards.shape[0] == action.shape[0]
def test_issue_204(): """ Test that reproduces the issue #204, which was that some zombie processes appeared to be created when iterating using an EnvironmentProxy. The issue appears to have been caused by calling `self.__environment.reset()` in `__iter__`, which I think caused another dataloader iterator to be created? """ transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) batch_size = 2048 num_workers = 12 dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) current_process = psutil.Process() print( f"Current process is using {current_process.num_threads()} threads, with " f" {len(current_process.children(recursive=True))} child processes.") starting_threads = current_process.num_threads() starting_processes = len(current_process.children(recursive=True)) for use_wrapper in [False, True]: threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads assert processes == starting_processes env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment env: Iterable[Tuple[Tensor, Tensor]] = env_type( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, num_workers=num_workers, persistent_workers=True, ) for i, _ in enumerate(env): threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print(f"Current process is using {threads} threads, with " f" {processes} child processes.") for i, _ in enumerate(env): threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print(f"Current process is using {threads} threads, with " f" {processes} child processes.") obs = env.reset() done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) # env.render(mode="human") threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) if not done: assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print(f"Current process is using {threads} threads, with " f" {processes} child processes.") env.close() import time # Need to give it a second (or so) to cleanup. time.sleep(1) threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads assert processes == starting_processes
def __init__( self, dataset: Union[IterableDataset, Dataset], split_batch_fn: Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ] = None, observation_space: gym.Space = None, action_space: gym.Space = None, reward_space: gym.Space = None, n_classes: int = None, pretend_to_be_active: bool = False, strict: bool = False, **kwargs, ): """Creates the DataLoader/Environment for the given dataset. Parameters ---------- dataset : Union[IterableDataset, Dataset] The dataset to iterate on. Should ideally be indexable (a Map-style dataset). split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional A function to call on each item in the dataset in order to split it into Observations and Rewards, by default None, in which case we assume that the dataset items are tuples of length 2. observation_space : gym.Space, optional The single (non-batched) observation space. Default to `None`, in which case this will try to infer the shape of the space using the first item in the dataset. action_space : gym.Space, optional The non-batched action space. Defaults to None, in which case the `n_classes` argument must be passed, and the action space is assumed to be discrete (i.e. that the loader is for a classification dataset). reward_space : gym.Space, optional The non-batched reward (label) space. Defaults to `None`, in which case it will be the same as the action space (as is the case in classification). n_classes : int, optional Number of classes in the dataset. Used in case `action_space` isn't passed. Defaults to `None`. pretend_to_be_active : bool, optional Wether to withhold the rewards (labels) from the batches when being iterated on like the usual dataloader, and to only give them back after an action is received through the 'send' method. False by default, in which case this behaves exactly as a normal dataloader when being iterated on. When False, the batches yielded by this dataloader will be of the form `Tuple[Observations, Rewards]` (as usual in SL). However, when set to True, the batches will be `Tuple[Observations, None]`! Rewards will then be returned by the environment when an action is passed to the Send method. strict : bool, optional [description], by default False # Examples: ```python train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10) # The usual Dataloader-style: for x, y in train_env: # train as usual (...) # OpenAI Gym style: for episode in range(5): # NOTE: "episode" in RL is an "epoch" in SL: obs = train_env.reset() done = False while not done: actions = train_env.action_space.sample() obs, rewards, done, info = train_env.step(actions) ``` """ super().__init__(dataset=dataset, **kwargs) self.split_batch_fn = split_batch_fn # TODO: When the spaces aren't passed explicitly, assumes a classification dataset. if not observation_space: # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use # min_max of the dataset samples too. first_item = self.dataset[0] if isinstance(first_item, tuple): x, *_ = first_item else: assert isinstance(first_item, (np.ndarray, Tensor)) x = first_item observation_space = Image(0.0, 1.0, x.shape) if not action_space: assert n_classes, "must pass either `action_space`, or `n_classes` for now" action_space = spaces.Discrete(n_classes) elif isinstance(action_space, spaces.Discrete): n_classes = action_space.n if not reward_space: # Assuming a classification dataset by default: # (action space = reward space = Discrete(n_classes)) reward_space = action_space assert observation_space assert action_space assert reward_space self.single_observation_space: Space = observation_space self.single_action_space: Space = action_space self.single_reward_space: Space = reward_space if self.batch_size: observation_space = batch_space(observation_space, self.batch_size) action_space = batch_space(action_space, self.batch_size) reward_space = batch_space(reward_space, self.batch_size) self.observation_space: gym.Space = add_tensor_support(observation_space) self.action_space: gym.Space = add_tensor_support(action_space) self.reward_space: gym.Space = add_tensor_support(reward_space) self.pretend_to_be_active = pretend_to_be_active self._strict = strict self._reward_queue = deque(maxlen=10) self.n_classes: Optional[int] = n_classes self._iterator: Optional[_BaseDataLoaderIter] = None # NOTE: These here are never processed with self.observation or self.reward. self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None self._done: Optional[bool] = None self._closed: bool = False self._action: Optional[ActionType] = None # from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = None
from ..passive_environment import Actions, PassiveEnvironment, Rewards from ..passive_setting import PassiveSetting from .class_incremental_results import ClassIncrementalResults from .measure_performance_wrapper import MeasureSLPerformanceWrapper logger = get_logger(__file__) # NOTE: This dict reflects the observation space of the different datasets # *BEFORE* any transforms are applied. The resulting property on the Setting is # based on this 'base' observation space, passed through the transforms. # TODO: Make it possible to automatically add tensor support if the dtype passed to a # gym space is a `torch.dtype`. tensor_space = add_tensor_support base_observation_spaces: Dict[str, Space] = { "mnist": tensor_space(Image(0, 1, shape=(1, 28, 28))), "fashionmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))), "kmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))), "emnist": tensor_space(Image(0, 1, shape=(1, 28, 28))), "qmnist": tensor_space(Image(0, 1, shape=(1, 28, 28))), "mnistfellowship": tensor_space(Image(0, 1, shape=(1, 28, 28))), # TODO: Determine the true bounds on the image values in cifar10. # Appears to be ~= [-2.5, 2.5] "cifar10": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), "cifar100": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), "cifarfellowship": tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), "imagenet100": tensor_space(Image(0, 1, shape=(224, 224, 3))), "imagenet1000": tensor_space(Image(0, 1, shape=(224, 224, 3))), "core50": tensor_space(Image(0, 1, shape=(224, 224, 3))), "core50-v2-79": tensor_space(Image(0, 1, shape=(224, 224, 3))), "core50-v2-196": tensor_space(Image(0, 1, shape=(224, 224, 3))),
from sequoia.common.gym_wrappers.batch_env.tile_images import tile_images from .objects import ( Actions, ActionType, Observations, ObservationType, Rewards, RewardType, ) logger = get_logger(__file__) base_observation_spaces: Dict[str, Space] = { dataset_class.__name__.lower(): space for dataset_class, space in { MNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), FashionMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), KMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), EMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), QMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), MNISTFellowship: tensor_space(Image(0, 1, shape=(1, 28, 28))), # TODO: Determine the true bounds on the image values in cifar10. # Appears to be ~= [-2.5, 2.5] CIFAR10: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), CIFAR100: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), CIFARFellowship: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), ImageNet100: tensor_space(Image(0, 1, shape=(224, 224, 3))), ImageNet1000: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50v2_79: tensor_space(Image(0, 1, shape=(224, 224, 3))),
def test_monsterkong(task_labels_at_test_time: bool, state: bool): """ checks that the MonsterKong env works fine with monsterkong and state input. """ setting = IncrementalRLSetting( dataset="monsterkong", observe_state_directly=state, nb_tasks=5, steps_per_task=100, test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], task_labels_at_test_time=task_labels_at_test_time, max_episode_steps=10, ) if state: # State-based monsterkong: We observe a flattened version of the game state # (20 x 20 grid + player cell and goal cell, IIRC.) assert setting.observation_space.x == spaces.Box( 0, 292, (402, ), np.int16) else: assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8) if task_labels_at_test_time: assert setting.observation_space.task_labels == spaces.Discrete(5) else: assert setting.observation_space.task_labels == Sparse( spaces.Discrete(5), sparsity=0.0) assert setting.test_steps == 500 with setting.train_dataloader() as env: obs = env.reset() assert obs in setting.observation_space method = DummyMethod() _ = setting.apply(method) assert method.n_task_switches == 30 if task_labels_at_test_time: assert method.received_task_ids == [ 0, *list(range(5)), 1, *list(range(5)), 2, *list(range(5)), 3, *list(range(5)), 4, *list(range(5)), ] else: assert method.received_task_ids == [ 0, *[None for _ in range(5)], 1, *[None for _ in range(5)], 2, *[None for _ in range(5)], 3, *[None for _ in range(5)], 4, *[None for _ in range(5)], ] assert method.received_while_training == [ True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], ]
# "rotatedmnist": (28, 28, 1), "core50": (3, 224, 224), "core50-v2-79": (3, 224, 224), "core50-v2-196": (3, 224, 224), "core50-v2-391": (3, 224, 224), "synbols": (3, 224, 224), } from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support # NOTE: This dict reflects the observation space of the different datasets # *BEFORE* any transforms are applied. The resulting property on the Setting is # based on this 'base' observation space, passed through the transforms. base_observation_spaces: Dict[str, Space] = { dataset_name: add_tensor_support(Image(0, 1, image_shape, np.float32)) for dataset_name, image_shape in { "mnist": (1, 28, 28), "fashionmnist": (1, 28, 28), "kmnist": (28, 28, 1), "emnist": (28, 28, 1), "qmnist": (28, 28, 1), "mnistfellowship": (28, 28, 1), "cifar10": (32, 32, 3), "cifar100": (32, 32, 3), "cifarfellowship": (32, 32, 3), "imagenet100": (224, 224, 3), "imagenet1000": (224, 224, 3), # "permutedmnist": (28, 28, 1), # "rotatedmnist": (28, 28, 1), "core50": (224, 224, 3),
def test_monsterkong_pixels(task_labels_at_test_time: bool): """ checks that the MonsterKong env works fine with monsterkong and state input. """ setting = IncrementalRLSetting( dataset="monsterkong", observe_state_directly=False, nb_tasks=5, steps_per_task=100, test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], task_labels_at_test_time=task_labels_at_test_time, max_episode_steps=10, ) assert setting.test_steps == 500 assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8) with setting.train_dataloader() as env: obs = env.reset() assert obs in setting.observation_space method = DummyMethod() results = setting.apply(method) assert method.n_task_switches == 30 if task_labels_at_test_time: assert method.received_task_ids == [ 0, *list(range(5)), 1, *list(range(5)), 2, *list(range(5)), 3, *list(range(5)), 4, *list(range(5)), ] else: assert method.received_task_ids == [ 0, *[None for _ in range(5)], 1, *[None for _ in range(5)], 2, *[None for _ in range(5)], 3, *[None for _ in range(5)], 4, *[None for _ in range(5)], ] assert method.received_while_training == [ True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], True, *[False for _ in range(5)], ]