예제 #1
0
class SLSetting(Setting[PassiveEnvironment[ObservationType, ActionType,
                                           RewardType]]):
    """Supervised Learning Setting.
    
    Core assuptions:
    - Current actions have no influence on future observations.
    - The environment gives back "dense feedback", (the 'reward' associated with all
      possible actions at each step, rather than a single action)

    For example, supervised learning is a Passive setting, since predicting a
    label has no effect on the reward you're given (the label) or on the next
    samples you observe.
    """
    @dataclass(frozen=True)
    class Observations(Setting.Observations):
        x: Tensor

    @dataclass(frozen=True)
    class Actions(Setting.Actions):
        pass

    @dataclass(frozen=True)
    class Rewards(Setting.Rewards):
        pass

    Environment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment

    # TODO: rename/remove this, as it isn't used, and there could be some
    # confusion with the available_datasets in task-incremental and iid.
    # Also, since those are already LightningDataModules, what should we do?
    available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = {
        # "mnist": MNISTDataModule,
        # "fashion_mnist": FashionMNISTDataModule,
        # "cifar10": CIFAR10DataModule,
        # "imagenet": ImagenetDataModule,
    }
    # Which setup / dataset to use.
    # The setups/dataset are implemented as `LightningDataModule`s.
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transforms to be applied to the observatons of the train/valid/test
    # environments.
    transforms: List[Transforms] = list_field()

    # Transforms to be applied to the training datasets.
    train_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                    Transforms.three_channels)
    # Transforms to be applied to the validation datasets.
    val_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                  Transforms.three_channels)
    # Transforms to be applied to the testing datasets.
    test_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                   Transforms.three_channels)
    # Wether to drop the last batch (during training). Useful if you use batchnorm, to
    # avoid having an error when the batch_size is 1.
    drop_last: bool = False
예제 #2
0
    class HParams(ClassificationHead.HParams):
        hidden_layers: int = 0
        hidden_neurons: List[int] = list_field()
        # The discount factor for the Return term.
        gamma: float = 0.99

        # The maximum length of the buffer that will hold the most recent
        # states/actions/rewards of the current episode.
        max_episode_window_length: int = 1000

        # Minumum number of epidodes that need to be completed in each env
        # before we update the parameters of the output head.
        min_episodes_before_update: int = 1

        # TODO: Add this mechanism, so that this method could work even when
        # episodes are very long.
        max_steps_between_updates: Optional[int] = None

        # NOTE: Here we have two options:
        # 1- `True`: sum up all the losses and do one larger backward pass,
        # and have `retrain_graph=False`, or
        # 2- `False`: Perform multiple little backward passes, one for each
        # end-of-episode in a single env, w/ `retain_graph=True`.
        # Option 1 is maybe more performant, as it might only require
        # unrolling the graph once, but would use more memory to store all the
        # intermediate graphs.
        accumulate_losses_before_backward: bool = flag(True)
class RelabelTransform(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
    """ Transform that puts labels back into the [0, n_classes_per_task] range.
    
    For instance, if it's given a bunch of images that have labels [2, 3, 2]
    and the `task_classes = [2, 3]`, then the new labels will be
    `[0, 1, 0]`.
    
    Note that the order in `task_classes` is perserved. For instance, in the
    above example, if `task_classes = [3, 2]`, then the new labels would be
    `[1, 0, 1]`.
    
    IMPORTANT: This transform needs to be applied BEFORE ReorderTensor or
    SplitBatch, because it expects the batch to be (x, y, t) order
    """
    task_classes: List[int] = list_field()

    def __call__(self, batch: Tuple[Tensor, ...]):
        assert isinstance(batch, (list, tuple)), batch
        if len(batch) == 2:
            observations, rewards = batch
        if len(batch) == 1:
            return batch
        x, y, *task_labels = batch

        # if y.max() == len(self.task_classes):
        #     # No need to relabel this batch.
        #     # @lebrice: Can we really skip relabeling in this case?
        #     return batch

        new_y = relabel(y, task_classes=self.task_classes)
        return (x, new_y, *task_labels)
예제 #4
0
class PassiveSetting(Setting[PassiveEnvironment[ObservationType, ActionType,
                                                RewardType]]):
    """Setting where actions have no influence on future observations.

    For example, supervised learning is a Passive setting, since predicting a
    label has no effect on the reward you're given (the label) or on the next
    samples you observe.
    """

    # @dataclass(frozen=True)
    # class Observations(Setting.Observations):
    #     pass

    # @dataclass(frozen=True)
    # class Actions(Setting.Actions):
    #     pass

    # @dataclass(frozen=True)
    # class Rewards(Setting.Rewards):
    #     pass

    # TODO: rename/remove this, as it isn't used, and there could be some
    # confusion with the available_datasets in task-incremental and iid.
    # Also, since those are already LightningDataModules, what should we do?
    available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = {
        # "mnist": MNISTDataModule,
        # "fashion_mnist": FashionMNISTDataModule,
        # "cifar10": CIFAR10DataModule,
        # "imagenet": ImagenetDataModule,
    }
    # Which setup / dataset to use.
    # The setups/dataset are implemented as `LightningDataModule`s.
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transforms to be applied to the observatons of the train/valid/test
    # environments.
    transforms: List[Transforms] = list_field()

    # Transforms to be applied to the training datasets.
    train_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                    Transforms.three_channels)
    # Transforms to be applied to the validation datasets.
    val_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                  Transforms.three_channels)
    # Transforms to be applied to the testing datasets.
    test_transforms: List[Transforms] = list_field(Transforms.to_tensor,
                                                   Transforms.three_channels)
예제 #5
0
class Task(Serializable):
    """ Dataclass that represents a task.

    TODO (@lebrice): This isn't being used anymore, but we could probably
    use it / add it to the Continuum package, if it doesn't already have something
    like it.
    TODO: Maybe the this could also specify from which dataset(s) it is sampled.
    """
    # The index of this task (the order in which it was encountered)
    index: int = field(default=-1, repr=False)
    # All the unique classes present within this task. (order matters)
    classes: List[int] = list_field()
예제 #6
0
class ClassIncrementalSetting(PassiveSetting, IncrementalSetting):
    """Supervised Setting where the data is a sequence of 'tasks'.

    This class is basically is the supervised version of an Incremental Setting


    The current task can be set at the `current_task_id` attribute.
    """

    Results: ClassVar[Type[Results]] = ClassIncrementalResults

    # (NOTE: commenting out PassiveSetting.Observations as it is the same class
    # as Setting.Observations, and we want a consistent method resolution order.
    @dataclass(frozen=True)
    class Observations(  #PassiveSetting.Observations,
            IncrementalSetting.Observations):
        """ Incremental Observations, in a supervised context. """

        pass

    # @dataclass(frozen=True)
    # class Actions(PassiveSetting.Actions,
    #               IncrementalSetting.Actions):
    #     """Incremental Actions, in a supervised (passive) context."""
    #     pass

    # @dataclass(frozen=True)
    # class Rewards(PassiveSetting.Rewards,
    #               IncrementalSetting.Rewards):
    #     """Incremental Rewards, in a supervised context."""
    #     pass

    # Class variable holding a dict of the names and types of all available
    # datasets.
    # TODO: Issue #43: Support other datasets than just classification
    available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = {
        c.__name__.lower(): c
        for c in [
            CIFARFellowship,
            MNISTFellowship,
            ImageNet100,
            ImageNet1000,
            CIFAR10,
            CIFAR100,
            EMNIST,
            KMNIST,
            MNIST,
            QMNIST,
            FashionMNIST,
            Synbols,
        ]
        # "synbols": Synbols,
        # "synbols_font": partial(Synbols, task="fonts"),
    }
    # A continual dataset to use. (Should be taken from the continuum package).
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transformations to use. See the Transforms enum for the available values.
    transforms: List[Transforms] = list_field(
        Transforms.to_tensor,
        # BUG: The input_shape given to the Model doesn't have the right number
        # of channels, even if we 'fixed' them here. However the images are fine
        # after.
        Transforms.three_channels,
        Transforms.channels_first_if_needed,
    )

    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes.
    increment: Union[int, List[int]] = list_field(2,
                                                  type=int,
                                                  nargs="*",
                                                  alias="n_classes_per_task")
    # The scenario number of tasks.
    # If zero, defaults to the number of classes divied by the increment.
    nb_tasks: int = 0
    # A different task size applied only for the first task.
    # Desactivated if `increment` is a list.
    initial_increment: int = 0
    # An optional custom class order, used for NC.
    class_order: Optional[List[int]] = None
    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes (defaults to the value of
    # `increment`).
    test_increment: Optional[Union[List[int], int]] = None
    # A different task size applied only for the first test task.
    # Desactivated if `test_increment` is a list. Defaults to the
    # value of `initial_increment`.
    test_initial_increment: Optional[int] = None
    # An optional custom class order for testing, used for NC.
    # Defaults to the value of `class_order`.
    test_class_order: Optional[List[int]] = None

    # TODO: Need to put num_workers in only one place.
    batch_size: int = field(default=32, cmd=False)
    num_workers: int = field(default=4, cmd=False)

    # Wether or not to relabel the images to be within the [0, n_classes_per_task]
    # range. Floating (False by default) in Class-Incremental Setting, but set to True
    # in domain_incremental Setting.
    relabel: bool = False

    def __post_init__(self):
        """Initializes the fields of the Setting (and LightningDataModule),
        including the transforms, shapes, etc.
        """
        if isinstance(self.increment, list) and len(self.increment) == 1:
            # This can happen when parsing a list from the command-line.
            self.increment = self.increment[0]

        base_reward_space = reward_spaces[self.dataset]
        # action space = reward space by default
        base_action_space = base_reward_space

        if isinstance(base_action_space, spaces.Discrete):
            # Classification dataset

            self.num_classes = base_action_space.n
            # Set the number of tasks depending on the increment, and vice-versa.
            # (as only one of the two should be used).
            if self.nb_tasks == 0:
                self.nb_tasks = self.num_classes // self.increment
            else:
                self.increment = self.num_classes // self.nb_tasks
        else:
            raise NotImplementedError("TODO: (issue #43)")

        if not self.class_order:
            self.class_order = list(range(self.num_classes))

        # Test values default to the same as train.
        self.test_increment = self.test_increment or self.increment
        self.test_initial_increment = self.test_initial_increment or self.test_increment
        self.test_class_order = self.test_class_order or self.class_order

        # TODO: For now we assume a fixed, equal number of classes per task, for
        # sake of simplicity. We could take out this assumption, but it might
        # make things a bit more complicated.
        assert isinstance(self.increment, int)
        assert isinstance(self.test_increment, int)

        self.n_classes_per_task: int = self.increment
        action_space = spaces.Discrete(self.n_classes_per_task)
        reward_space = spaces.Discrete(self.n_classes_per_task)

        super().__post_init__(
            # observation_space=observation_space,
            action_space=action_space,
            reward_space=reward_space,  # the labels have shape (1,) always.
        )
        self.train_datasets: List[_ContinuumDataset] = []
        self.val_datasets: List[_ContinuumDataset] = []
        self.test_datasets: List[_ContinuumDataset] = []

        # This will be set by the Experiment, or passed to the `apply` method.
        # TODO: This could be a bit cleaner.
        self.config: Config
        # Default path to which the datasets will be downloaded.
        self.data_dir: Optional[Path] = None

        self.train_env: PassiveEnvironment = None  # type: ignore
        self.val_env: PassiveEnvironment = None  # type: ignore
        self.test_env: PassiveEnvironment = None  # type: ignore

    @property
    def observation_space(self) -> NamedTupleSpace:
        """ The un-batched observation space, based on the choice of dataset and
        the transforms at `self.transforms` (which apply to the train/valid/test
        environments).

        The returned spaces is a NamedTupleSpace, with the following properties:
        - `x`: observation space (e.g. `Image` space)
        - `task_labels`: Union[Discrete, Sparse[Discrete]]
           The task labels for each sample. When task labels are not available,
           the task labels space is Sparse, and entries will be `None`.
        """
        x_space = base_observation_spaces[self.dataset]
        if not self.transforms:
            # NOTE: When we don't pass any transforms, continuum scenarios still
            # at least use 'to_tensor'.
            x_space = Transforms.to_tensor(x_space)

        # apply the transforms to the observation space.
        for transform in self.transforms:
            x_space = transform(x_space)
        x_space = add_tensor_support(x_space)

        task_label_space = spaces.Discrete(self.nb_tasks)
        if not self.task_labels_at_train_time:
            task_label_space = Sparse(task_label_space, 1.0)
        task_label_space = add_tensor_support(task_label_space)

        return NamedTupleSpace(
            x=x_space,
            task_labels=task_label_space,
            dtype=self.Observations,
        )

    @property
    def action_space(self) -> spaces.Discrete:
        """ Action space for this setting. """
        if self.relabel:
            return spaces.Discrete(self.n_classes_per_task)
        return spaces.Discrete(self.num_classes)

        # TODO: IDEA: Have the action space only reflect the number of 'current' classes
        # in order to create a "true" class-incremental learning setting.
        n_classes_seen_so_far = 0
        for task_id in range(self.current_task_id):
            n_classes_seen_so_far += self.num_classes_in_task(task_id)
        return spaces.Discrete(n_classes_seen_so_far)

    @property
    def reward_space(self) -> spaces.Discrete:
        return self.action_space

    def apply(self,
              method: Method,
              config: Config = None) -> ClassIncrementalResults:
        """Apply the given method on this setting to producing some results."""
        # TODO: It still isn't super clear what should be in charge of creating
        # the config, and how to create it, when it isn't passed explicitly.
        self.config: Config
        if config is not None:
            self.config = config
            logger.debug(f"Using Config {self.config}")
        elif isinstance(getattr(method, "config", None), Config):
            # If the Method has a `config` attribute that is a Config, use that.
            self.config = method.config
            logger.debug(f"Using Config from the Method: {self.config}")
        else:
            logger.debug("Parsing the Config from the command-line.")
            self.config = Config.from_args(self._argv, strict=False)
            logger.debug(f"Resulting Config: {self.config}")

        method.configure(setting=self)

        # Run the main loop (which is defined in IncrementalSetting).
        results: ClassIncrementalResults = super().main_loop(method)
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results

    def prepare_data(self, data_dir: Path = None, **kwargs):
        self.config = self.config or Config.from_args(self._argv, strict=False)

        # if self.batch_size is None:
        #     logger.warning(UserWarning(
        #         f"Using the default batch size of 32. (You can set the "
        #         f"batch size by passing a value to the Setting constructor, or "
        #         f"by setting the attribute inside your 'configure' method) "
        #     ))
        #     self.batch_size = 32

        data_dir = data_dir or self.data_dir or self.config.data_dir
        self.make_dataset(data_dir, download=True)
        self.data_dir = data_dir
        super().prepare_data(**kwargs)

    def setup(self, stage: Optional[str] = None, *args, **kwargs):
        """ Creates the datasets for each task.
        TODO: Figure out a way of setting data_dir elsewhere maybe?
        """
        assert self.config
        # self.config = self.config or Config.from_args(self._argv)
        logger.debug(
            f"data_dir: {self.data_dir}, setup args: {args} kwargs: {kwargs}")

        self.train_cl_dataset = self.make_dataset(self.data_dir,
                                                  download=False,
                                                  train=True)
        self.test_cl_dataset = self.make_dataset(self.data_dir,
                                                 download=False,
                                                 train=False)

        self.train_cl_loader: _BaseScenario = self.make_train_cl_loader(
            self.train_cl_dataset)
        self.test_cl_loader: _BaseScenario = self.make_test_cl_loader(
            self.test_cl_dataset)

        logger.info(f"Number of train tasks: {self.train_cl_loader.nb_tasks}.")
        logger.info(f"Number of test tasks: {self.train_cl_loader.nb_tasks}.")

        self.train_datasets.clear()
        self.val_datasets.clear()
        self.test_datasets.clear()

        for task_id, train_dataset in enumerate(self.train_cl_loader):
            train_dataset, val_dataset = split_train_val(
                train_dataset, val_split=self.val_fraction)
            self.train_datasets.append(train_dataset)
            self.val_datasets.append(val_dataset)

        for task_id, test_dataset in enumerate(self.test_cl_loader):
            self.test_datasets.append(test_dataset)

        super().setup(stage, *args, **kwargs)

        # TODO: Adding this temporarily just for the competition
        self.test_boundary_steps = [0] + list(
            itertools.accumulate(map(len, self.test_datasets)))[:-1]
        self.test_steps = sum(map(len, self.test_datasets))
        # self.test_steps = [0] + list(
        #     itertools.accumulate(map(len, self.test_datasets))
        # )[:-1]

    def get_train_dataset(self) -> Dataset:
        return self.train_datasets[self.current_task_id]

    def get_val_dataset(self) -> Dataset:
        return self.val_datasets[self.current_task_id]

    def get_test_dataset(self) -> Dataset:
        return ConcatDataset(self.test_datasets)

    def train_dataloader(self,
                         batch_size: int = None,
                         num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the train dataset of the current task. """
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_fit:
            self.setup("fit")

        if self.train_env:
            self.train_env.close()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self.get_train_dataset()
        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised.
        env = PassiveEnvironment(
            dataset,
            split_batch_fn=self.split_batch_function(training=True),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            pin_memory=True,
            batch_size=batch_size,
            num_workers=num_workers,
            # Since the dataset only contains data from the current task(s), it's fine
            # to shuffle here. TODO: Double-check this.
            shuffle=True,
        )

        if self.config.render:
            # TODO: Add a callback wrapper that calls 'env.render' at each step?
            env = RenderEnvWrapper(env)

        if self.train_transforms:
            # TODO: Check that the transforms aren't already being applied in the
            # 'dataset' portion.
            env = TransformObservation(env, f=self.train_transforms)

        if self.monitor_training_performance:
            env = MeasureSLPerformanceWrapper(
                env,
                first_epoch_only=True,
                wandb_prefix=f"Train/Task {self.current_task_id}",
            )

        self.train_env = env
        return self.train_env

    def val_dataloader(self,
                       batch_size: int = None,
                       num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the validation dataset of the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_fit:
            self.setup("fit")

        dataset = self.get_val_dataset()
        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers
        env = PassiveEnvironment(
            dataset,
            split_batch_fn=self.split_batch_function(training=True),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            pin_memory=True,
            batch_size=batch_size,
            num_workers=num_workers,
            # Since the dataset only contains data from the current task(s), it's fine
            # to shuffle here. TODO: Double-check this.
            shuffle=True,
        )
        if self.val_transforms:
            env = TransformObservation(env, f=self.val_transforms)

        if self.val_env:
            self.val_env.close()
            del self.val_env
        self.val_env = env
        return self.val_env

    def test_dataloader(
        self,
        batch_size: int = None,
        num_workers: int = None
    ) -> PassiveEnvironment["ClassIncrementalSetting.Observations", Actions,
                            Rewards]:
        """Returns a DataLoader for the test dataset of the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_test:
            self.setup("test")

        # Testing this out, we're gonna have a "test schedule" like this to try
        # to imitate the MultiTaskEnvironment in RL.
        transition_steps = [0] + list(
            itertools.accumulate(map(len, self.test_datasets)))[:-1]
        # Join all the test datasets.
        dataset = self.get_test_dataset()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        env = PassiveEnvironment(
            dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            split_batch_fn=self.split_batch_function(training=False),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            pretend_to_be_active=True,
            shuffle=False,
        )
        if self.test_transforms:
            env = TransformObservation(env, f=self.test_transforms)

        # NOTE: Two ways of removing the task labels: Either using a different
        # 'split_batch_fn' at train and test time, or by using this wrapper
        # which is also used in the RL side of the tree:
        # TODO: Maybe remove/simplify the 'split_batch_function'.
        from sequoia.settings.active.continual.wrappers import HideTaskLabelsWrapper

        if not self.task_labels_at_test_time:
            env = HideTaskLabelsWrapper(env)

        # FIXME: Creating a 'task schedule' for the TestEnvironment, mimicing what's in
        # the RL settings.
        test_task_schedule = dict.fromkeys(
            [step // (env.batch_size or 1) for step in transition_steps],
            range(len(transition_steps)),
        )
        # TODO: Configure the 'monitoring' dir properly.
        test_dir = "results"
        test_loop_max_steps = len(dataset) // (env.batch_size or 1)
        # TODO: Fix this: iteration doesn't ever end for some reason.

        test_env = ClassIncrementalTestEnvironment(
            env,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            task_schedule=test_task_schedule,
            force=True,
            config=self.config,
        )

        if self.test_env:
            self.test_env.close()
        self.test_env = test_env
        return self.test_env

    def split_batch_function(
        self, training: bool
    ) -> Callable[[Tuple[Tensor, ...]], Tuple[Observations, Rewards]]:
        """ Returns a callable that is used to split a batch into observations and rewards.
        """
        task_classes = {
            i: self.task_classes(i, train=training)
            for i in range(self.nb_tasks)
        }

        def split_batch(
                batch: Tuple[Tensor, ...]) -> Tuple[Observations, Rewards]:
            """Splits the batch into a tuple of Observations and Rewards.

            Parameters
            ----------
            batch : Tuple[Tensor, ...]
                A batch of data coming from the dataset.

            Returns
            -------
            Tuple[Observations, Rewards]
                A tuple of Observations and Rewards.
            """
            # In this context (class_incremental), we will always have 3 items per
            # batch, because we use the ClassIncremental scenario from Continuum.
            assert len(batch) == 3
            x, y, t = batch

            # Relabel y so it is always in [0, n_classes_per_task) for each task.
            if self.relabel:
                y = relabel(y, task_classes)

            if (training and not self.task_labels_at_train_time) or (
                    not training and not self.task_labels_at_test_time):
                # Remove the task labels if we're not currently allowed to have
                # them.
                # TODO: Using None might cause some issues. Maybe set -1 instead?
                t = None

            observations = self.Observations(x=x, task_labels=t)
            rewards = self.Rewards(y=y)

            return observations, rewards

        return split_batch

    def make_train_cl_loader(
            self, train_dataset: _ContinuumDataset) -> _BaseScenario:
        """ Creates a train ClassIncremental object from continuum. """
        return ClassIncremental(
            train_dataset,
            nb_tasks=self.nb_tasks,
            increment=self.increment,
            initial_increment=self.initial_increment,
            class_order=self.class_order,
            transformations=self.transforms,
        )

    def make_test_cl_loader(self,
                            test_dataset: _ContinuumDataset) -> _BaseScenario:
        """ Creates a test ClassIncremental object from continuum. """
        return ClassIncremental(
            test_dataset,
            nb_tasks=self.nb_tasks,
            increment=self.test_increment,
            initial_increment=self.test_initial_increment,
            class_order=self.test_class_order,
            transformations=self.transforms,
        )

    def make_dataset(self,
                     data_dir: Path,
                     download: bool = True,
                     train: bool = True,
                     **kwargs) -> _ContinuumDataset:
        # TODO: #7 Use this method here to fix the errors that happen when
        # trying to create every single dataset from continuum.
        data_dir = Path(data_dir)

        if not data_dir.exists():
            data_dir.mkdir(parents=True, exist_ok=True)

        if self.dataset in self.available_datasets:
            dataset_class = self.available_datasets[self.dataset]
            return dataset_class(data_path=data_dir,
                                 download=download,
                                 train=train,
                                 **kwargs)

        elif self.dataset in self.available_datasets.values():
            dataset_class = self.dataset
            return dataset_class(data_path=data_dir,
                                 download=download,
                                 train=train,
                                 **kwargs)

        elif isinstance(self.dataset, Dataset):
            logger.info(f"Using a custom dataset {self.dataset}")
            return self.dataset

        else:
            raise NotImplementedError

    # These methods below are used by the MultiHeadModel, mostly when
    # using a multihead model, to figure out how to relabel the batches, or how
    # many classes there are in the current task (since we support a different
    # number of classes per task).
    # TODO: Remove this? Since I'm simplifying to a fixed number of classes per
    # task for now...

    def num_classes_in_task(self, task_id: int,
                            train: bool) -> Union[int, List[int]]:
        """ Returns the number of classes in the given task. """
        increment = self.increment if train else self.test_increment
        if isinstance(increment, list):
            return increment[task_id]
        return increment

    def num_classes_in_current_task(self, train: bool = None) -> int:
        """ Returns the number of classes in the current task. """
        # TODO: Its ugly to have the 'method' tell us if we're currently in
        # train/eval/test, no? Maybe just make a method for each?
        return self.num_classes_in_task(self._current_task_id, train=train)

    def task_classes(self, task_id: int, train: bool) -> List[int]:
        """ Gives back the 'true' labels present in the given task. """
        start_index = sum(
            self.num_classes_in_task(i, train) for i in range(task_id))
        end_index = start_index + self.num_classes_in_task(task_id, train)
        if train:
            return self.class_order[start_index:end_index]
        else:
            return self.test_class_order[start_index:end_index]

    def current_task_classes(self, train: bool) -> List[int]:
        """ Gives back the labels present in the current task. """
        return self.task_classes(self._current_task_id, train)

    def _check_environments(self):
        """ Do a quick check to make sure that the dataloaders give back the
        right observations / reward types.
        """
        for loader_method in [
                self.train_dataloader,
                self.val_dataloader,
                self.test_dataloader,
        ]:
            logger.debug(f"Checking loader method {loader_method.__name__}")
            env = loader_method(batch_size=5)
            obs = env.reset()
            assert isinstance(obs, self.Observations)
            # Convert the observation to numpy arrays, to make it easier to
            # check if the elements are in the spaces.
            obs = obs.numpy()
            # take a slice of the first batch, to get sample tensors.
            first_obs = obs[:, 0]
            # TODO: Here we'd like to be able to check that the first observation
            # is inside the observation space, but we can't do that because the
            # task label might be None, and so that would make it fail.
            x, task_label = first_obs
            if task_label is None:
                assert x in self.observation_space[0]

            for i in range(5):
                actions = env.action_space.sample()
                observations, rewards, done, info = env.step(actions)
                assert isinstance(observations,
                                  self.Observations), type(observations)
                assert isinstance(rewards, self.Rewards), type(rewards)
                actions = env.action_space.sample()
                if done:
                    observations = env.reset()
            env.close()
예제 #7
0
class ContinualRLSetting(ActiveSetting, IncrementalSetting):
    """ Reinforcement Learning Setting where the environment changes over time.

    This is an Active setting which uses gym environments as sources of data.
    These environments' attributes could change over time following a task
    schedule. An example of this could be that the gravity increases over time
    in cartpole, making the task progressively harder as the agent interacts with
    the environment.
    """

    # The type of results returned by an RL experiment.
    Results: ClassVar[Type[Results]] = RLResults

    @dataclass(frozen=True)
    class Observations(IncrementalSetting.Observations):
        """ Observations in a continual RL Setting. """

        # Just as a reminder, these are the fields defined in the base classes:
        # x: Tensor
        # task_labels: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = None

        # The 'done' part of the 'step' method. We add this here in case a
        # method were to iterate on the environments in the dataloader-style so
        # they also have access to those (i.e. for the BaselineMethod).
        done: Optional[Sequence[bool]] = None
        # Same, for the 'info' portion of the result of 'step'.
        # TODO: If we add the 'task space' (with all the attributes, for instance
        # then add it to the observations using the `AddInfoToObservations`.
        # info: Optional[Sequence[Dict]] = None

    # Image transforms to use.
    transforms: List[Transforms] = list_field()

    # Class variable that holds the dict of available environments.
    available_datasets: ClassVar[Dict[str, str]] = {
        "cartpole": "CartPole-v0",
        "pendulum": "Pendulum-v0",
        "breakout": "Breakout-v0",
        # "duckietown": "Duckietown-straight_road-v0"
    }
    # TODO: Add breakout to 'available_datasets' only when atari_py is installed.

    # Which environment (a.k.a. "dataset") to learn on.
    # The dataset could be either a string (env id or a key from the
    # available_datasets dict), a gym.Env, or a callable that returns a single environment.
    # If self.dataset isn't one of those, an error will be raised.
    dataset: str = choice(available_datasets, default="cartpole")

    # The number of tasks. By default 1 for this setting.
    nb_tasks: int = field(1, alias=["n_tasks", "num_tasks"])

    # Max number of steps per task. (Also acts as the "length" of the training
    # and validation "Datasets")
    max_steps: int = 100_000
    # Maximum episodes per task.
    # TODO: Test that the limit on the number of episodes actually works.
    max_episodes: Optional[int] = None
    # Number of steps per task. When left unset and when `max_steps` is set,
    # takes the value of `max_steps` divided by `nb_tasks`.
    steps_per_task: Optional[int] = None
    # (WIP): Number of episodes per task.
    episodes_per_task: Optional[int] = None

    # Total number of steps in the test loop. (Also acts as the "length" of the testing
    # environment.)
    test_steps: int = 10_000
    # Number of steps per task in the test loop. When left unset and when `test_steps`
    # is set, takes the value of `test_steps` divided by `nb_tasks`.
    test_steps_per_task: Optional[int] = None

    # Standard deviation of the multiplicative Gaussian noise that is used to
    # create the values of the env attributes for each task.
    task_noise_std: float = 0.2

    # Wether the task boundaries are smooth or sudden.
    smooth_task_boundaries: bool = True

    # Wether to observe the state directly, rather than pixels. This can be
    # useful to debug environments like CartPole, for instance.
    observe_state_directly: bool = False

    # Path to a json file from which to read the train task schedule.
    train_task_schedule_path: Optional[Path] = None
    # Path to a json file from which to read the validation task schedule.
    valid_task_schedule_path: Optional[Path] = None
    # Path to a json file from which to read the test task schedule.
    test_task_schedule_path: Optional[Path] = None

    # Wether observations from the environments whould include
    # the end-of-episode signal. Only really useful if your method will iterate
    # over the environments in the dataloader style
    # (as does the baseline method).
    add_done_to_observations: bool = False

    # The maximum number of steps per episode. When None, there is no limit.
    max_episode_steps: Optional[int] = None

    # NOTE: Added this `cmd=False` option to mark that we don't want to generate
    # any command-line arguments for these fields.
    train_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
    valid_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
    test_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)

    train_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
    valid_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
    test_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)

    batch_size: Optional[int] = field(default=None, cmd=False)
    num_workers: Optional[int] = field(default=None, cmd=False)

    def __post_init__(self, *args, **kwargs):
        super().__post_init__(*args, **kwargs)
        self._new_random_task_on_reset: bool = False

        # Post processing of the 'dataset' field:
        if self.dataset in self.available_datasets.keys():
            # the environment name was passed, rather than an id
            # (e.g. 'cartpole' -> 'CartPole-v0").
            self.dataset = self.available_datasets[self.dataset]

        elif self.dataset not in self.available_datasets.values():
            # The passed dataset is assumed to be an environment ID, but it
            # wasn't in the dict of available datasets! We issue a warning, but
            # proceed to let the user use whatever environment they want to.
            logger.warning(
                UserWarning(
                    f"The chosen dataset/environment ({self.dataset}) isn't in the "
                    f"available_datasets dict, so we can't garantee this will work!"
                )
            )

        if isinstance(self.dataset, gym.Env) and self.batch_size:
            raise RuntimeError(
                f"Batch size should be None when a gym.Env "
                f"object is passed as `dataset`."
            )
        if not isinstance(self.dataset, (str, gym.Env)) and not callable(self.dataset):
            raise RuntimeError(
                f"`dataset` must be either a string, a gym.Env, or a callable. "
                f"(got {self.dataset})"
            )

        # Set the number of tasks depending on the increment, and vice-versa.
        # (as only one of the two should be used).
        assert self.max_steps, "assuming this should always be set, for now."
        # TODO: Clean this up, not super clear what options take precedence on
        # which other options.

        # Load the task schedules from the corresponding files, if present.
        if self.train_task_schedule_path:
            self.train_task_schedule = self.load_task_schedule(
                self.train_task_schedule_path
            )

        if self.valid_task_schedule_path:
            self.valid_task_schedule = self.load_task_schedule(
                self.valid_task_schedule_path
            )

        if self.test_task_schedule_path:
            self.test_task_schedule = self.load_task_schedule(
                self.test_task_schedule_path
            )

        if self.train_task_schedule:
            if self.steps_per_task is not None:
                # If steps per task was passed, then we overwrite the keys of the tasks
                # schedule.
                self.train_task_schedule = {
                    i * self.steps_per_task: self.train_task_schedule[step]
                    for i, step in enumerate(sorted(self.train_task_schedule.keys()))
                }
            else:
                # A task schedule was passed: infer the number of tasks from it.
                change_steps = sorted(self.train_task_schedule.keys())
                assert 0 in change_steps, "Schedule needs a task at step 0."
                # TODO: @lebrice: I guess we have to assume that the interval
                # between steps is constant for now? Do we actually depend on this
                # being the case? I think steps_per_task is only really ever used
                # for creating the task schedule, which we already have in this
                # case.
                assert (
                    len(change_steps) >= 2
                ), "WIP: need a minimum of two tasks in the task schedule for now."
                self.steps_per_task = change_steps[1] - change_steps[0]
                # Double-check that this is the case.
                for i in range(len(change_steps) - 1):
                    if change_steps[i + 1] - change_steps[i] != self.steps_per_task:
                        raise NotImplementedError(
                            f"WIP: This might not work yet if the tasks aren't "
                            f"equally spaced out at a fixed interval."
                        )

            nb_tasks = len(self.train_task_schedule)
            if self.smooth_task_boundaries:
                # NOTE: When in a ContinualRLSetting with smooth task boundaries,
                # the last entry in the schedule represents the state of the env at
                # the end of the "task". When there are clear task boundaries (i.e.
                # when in 'Class'/Task-Incremental RL), the last entry is the start
                # of the last task.
                nb_tasks -= 1
            if self.nb_tasks != 1:
                if self.nb_tasks != nb_tasks:
                    raise RuntimeError(
                        f"Passed number of tasks {self.nb_tasks} doesn't match the "
                        f"number of tasks deduced from the task schedule ({nb_tasks})"
                    )
            self.nb_tasks = nb_tasks

            self.max_steps = max(self.train_task_schedule.keys())
            if not self.smooth_task_boundaries:
                # See above note about the last entry.
                self.max_steps += self.steps_per_task

        elif self.nb_tasks:
            if self.steps_per_task:
                self.max_steps = self.nb_tasks * self.steps_per_task
            elif self.max_steps:
                self.steps_per_task = self.max_steps // self.nb_tasks

        elif self.steps_per_task:
            if self.nb_tasks:
                self.max_steps = self.nb_tasks * self.steps_per_task
            elif self.max_steps:
                self.nb_tasks = self.max_steps // self.steps_per_task

        elif self.max_steps:
            if self.nb_tasks:
                self.steps_per_task = self.max_steps // self.nb_tasks
            elif self.steps_per_task:
                self.nb_tasks = self.max_steps // self.steps_per_task

        if not all([self.nb_tasks, self.max_steps, self.steps_per_task]):
            raise RuntimeError(
                f"You need to provide at least two of 'max_steps', "
                f"'nb_tasks', or 'steps_per_task'."
            )

        assert self.max_steps == self.nb_tasks * self.steps_per_task

        if self.test_task_schedule:
            if 0 not in self.test_task_schedule:
                raise RuntimeError("Task schedules needs to include an initial task.")

            if self.test_steps_per_task is not None:
                # If steps per task was passed, then we overwrite the number of steps
                # for each task in the schedule to match.
                self.test_task_schedule = {
                    i * self.test_steps_per_task: self.test_task_schedule[step]
                    for i, step in enumerate(sorted(self.test_task_schedule.keys()))
                }

            change_steps = sorted(self.test_task_schedule.keys())
            assert 0 in change_steps, "Schedule needs to include task at step 0."

            nb_test_tasks = len(change_steps)
            if self.smooth_task_boundaries:
                nb_test_tasks -= 1
            assert (
                nb_test_tasks == self.nb_tasks
            ), "nb of tasks should be the same for train and test."

            self.test_steps_per_task = change_steps[1] - change_steps[0]
            for i in range(self.nb_tasks - 1):
                if change_steps[i + 1] - change_steps[i] != self.test_steps_per_task:
                    raise NotImplementedError(
                        "WIP: This might not work yet if the test tasks aren't "
                        "equally spaced out at a fixed interval."
                    )

            self.test_steps = max(change_steps)
            if not self.smooth_task_boundaries:
                # See above note about the last entry.
                self.test_steps += self.test_steps_per_task

        elif self.test_steps_per_task is None:
            # This is basically never the case, since the test_steps defaults to 10_000.
            assert (
                self.test_steps
            ), "need to set one of test_steps or test_steps_per_task"
            self.test_steps_per_task = self.test_steps // self.nb_tasks
        else:
            # FIXME: This is too complicated for what is is.
            # Check that the test steps must either be the default value, or the right
            # value to use in this case.
            assert self.test_steps in {10_000, self.test_steps_per_task * self.nb_tasks}
            assert (
                self.test_steps_per_task
            ), "need to set one of test_steps or test_steps_per_task"
            self.test_steps = self.test_steps_per_task * self.nb_tasks

        assert self.test_steps // self.test_steps_per_task == self.nb_tasks

        if self.smooth_task_boundaries:
            # If we're operating in the 'Online/smooth task transitions' "regime",
            # then there is only one "task", and we don't have task labels.
            # TODO: HOWEVER, the task schedule could/should be able to have more
            # than one non-stationarity! This indicates a need for a distinction
            # between 'tasks' and 'non-stationarities' (changes in the env).
            self.known_task_boundaries_at_train_time = False
            self.known_task_boundaries_at_test_time = False
            self.task_labels_at_train_time = False
            self.task_labels_at_test_time = False
            # self.steps_per_task = self.max_steps

        # Task schedules for training / validation and testing.

        # Create a temporary environment so we can extract the spaces and create
        # the task schedules.
        with self._make_env(
            self.dataset, self._temp_wrappers(), self.observe_state_directly
        ) as temp_env:
            # FIXME: Replacing the observation space dtypes from their original
            # 'generated' NamedTuples to self.Observations. The alternative
            # would be to add another argument to the MultiTaskEnv wrapper, to
            # pass down a dtype to be set on its observation_space's `dtype`
            # attribute, which would be ugly.
            assert isinstance(temp_env.observation_space, NamedTupleSpace)
            temp_env.observation_space.dtype = self.Observations
            # Populate the task schedules created above.
            if not self.train_task_schedule:
                train_change_steps = list(range(0, self.max_steps, self.steps_per_task))
                if self.smooth_task_boundaries:
                    # Add a last 'task' at the end of the 'epoch', so that the
                    # env changes smoothly right until the end.
                    train_change_steps.append(self.max_steps)
                self.train_task_schedule = self.create_task_schedule(
                    temp_env, train_change_steps,
                )

            assert self.train_task_schedule is not None
            # The validation task schedule is the same as the one used in
            # training by default.
            if not self.valid_task_schedule:
                self.valid_task_schedule = deepcopy(self.train_task_schedule)

            if not self.test_task_schedule:
                # The test task schedule is by default the same as in validation
                # except that the interval between the tasks may be different,
                # depending on the value of `self.test_steps_per_task`.
                valid_steps = sorted(self.valid_task_schedule.keys())
                valid_tasks = [self.valid_task_schedule[step] for step in valid_steps]
                self.test_task_schedule = {
                    i * self.test_steps_per_task: deepcopy(task)
                    for i, task in enumerate(valid_tasks)
                }

            # Set the spaces using the temp env.
            self.observation_space = temp_env.observation_space
            self.action_space = temp_env.action_space
            self.reward_range = temp_env.reward_range
            self.reward_space = getattr(
                temp_env,
                "reward_space",
                spaces.Box(
                    low=self.reward_range[0], high=self.reward_range[1], shape=()
                ),
            )

        del temp_env

        self.train_env: gym.Env
        self.valid_env: gym.Env
        self.test_env: gym.Env

    def create_task_schedule(
        self, temp_env: MultiTaskEnvironment, change_steps: List[int]
    ) -> Dict[int, Dict]:
        """ Create the task schedule, which maps from a step to the changes that
        will occur in the environment when that step is reached.
        
        Uses the provided `temp_env` to generate the random tasks at the steps
        given in `change_steps` (a list of integers).

        Returns a dictionary mapping from integers (the steps) to the changes
        that will occur in the env at that step.

        TODO: IDEA: Instead of just setting env attributes, use the
        `methodcaller` or `attrsetter` from the `operator` built-in package,
        that way later when we want to add support for Meta-World, we can just
        use `partial(methodcaller("set_task"), task="new_task")(env)` or
        something like that (i.e. generalize from changing an attribute to
        applying a function on the env, which would allow calling methods in
        addition to setting attributes.)
        """
        task_schedule: Dict[int, Dict] = {}
        # Start with the default task (step 0) and then add a new task at
        # intervals of `self.steps_per_task`
        for task_step in change_steps:
            if task_step == 0:
                # Start with the default task, so that we can recover the 'iid'
                # case with standard env dynamics when there is only one task
                # and no non-stationarity.
                task_schedule[task_step] = temp_env.default_task
            else:
                task_schedule[task_step] = temp_env.random_task()

        return task_schedule

    def apply(
        self, method: Method, config: Config = None
    ) -> "ContinualRLSetting.Results":
        """Apply the given method on this setting to producing some results. """
        # Use the supplied config, or parse one from the arguments that were
        # used to create `self`.
        self.config: Config
        if config is not None:
            self.config = config
            logger.debug(f"Using Config {self.config}")
        elif isinstance(getattr(method, "config", None), Config):
            self.config = method.config
            logger.debug(f"Using Config from the Method: {self.config}")
        else:
            logger.debug(f"Parsing the Config from the command-line.")
            self.config = Config.from_args(self._argv, strict=False)
            logger.debug(f"Resulting Config: {self.config}")

        # TODO: Test to make sure that this doesn't cause any other bugs with respect to
        # the display of stuff:
        # Call this method, which creates a virtual display if necessary.
        self.config.get_display()

        # TODO: Should we really overwrite the method's 'config' attribute here?
        if not getattr(method, "config", None):
            method.config = self.config

        # TODO: Remove `Setting.configure(method)` entirely, from everywhere,
        # and use the `prepare_data` or `setup` methods instead (since these
        # `configure` methods aren't using the `method` anyway.)
        method.configure(setting=self)

        # BUG This won't work if the task schedule uses callables as the values (as
        # they aren't json-serializable.)
        if self._new_random_task_on_reset:
            logger.info(
                f"Train tasks: "
                + json.dumps(list(self.train_task_schedule.values()), indent="\t")
            )
        else:
            logger.info(
                f"Train task schedule:"
                + json.dumps(self.train_task_schedule, indent="\t")
            )
        if self.config.debug:
            logger.debug(
                f"Test task schedule:"
                + json.dumps(self.test_task_schedule, indent="\t")
            )

        # Run the Training loop (which is defined in IncrementalSetting).
        results = self.main_loop(method)

        logger.info("Results summary:")
        logger.info(results.to_log_dict())
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results

        # Run the Test loop (which is defined in IncrementalSetting).
        # results: RlResults = self.test_loop(method)

    def setup(self, stage: str = None) -> None:
        # Called before the start of each task during training, validation and
        # testing.
        super().setup(stage=stage)
        if stage in {"fit", None}:
            self.train_wrappers = self.create_train_wrappers()
            self.valid_wrappers = self.create_valid_wrappers()
        elif stage in {"test", None}:
            self.test_wrappers = self.create_test_wrappers()
    
    def prepare_data(self, *args, **kwargs) -> None:
        # We don't really download anything atm.
        if self.config is None:
            self.config = Config()
        super().prepare_data(*args, **kwargs)

    def train_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> ActiveEnvironment:
        """Create a training gym.Env/DataLoader for the current task.
        
        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        GymDataLoader
            A (possibly vectorized) environment/dataloader for the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        # NOTE: We actually want to call setup every time, so we re-create the
        # wrappers for each task.
        # if not self.has_setup_fit:
        self.setup("fit")

        batch_size = batch_size or self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers
        env_factory = partial(
            self._make_env,
            base_env=self.dataset,
            wrappers=self.train_wrappers,
            observe_state_directly=self.observe_state_directly,
        )
        env_dataloader = self._make_env_dataloader(
            env_factory,
            batch_size=batch_size,
            num_workers=num_workers,
            max_steps=self.steps_per_task,
            max_episodes=self.episodes_per_task,
        )

        if self.monitor_training_performance:
            from sequoia.settings.passive.cl.measure_performance_wrapper import (
                MeasureRLPerformanceWrapper,
            )
            env_dataloader = MeasureRLPerformanceWrapper(
                env_dataloader, wandb_prefix=f"Train/Task {self.current_task_id}"
            )
        
        self.train_env = env_dataloader
        # BUG: There is a mismatch between the train env's observation space and the
        # shape of its observations.
        self.observation_space = self.train_env.observation_space

        return self.train_env

    def val_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> Environment:
        """Create a validation gym.Env/DataLoader for the current task.
        
        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        GymDataLoader
            A (possibly vectorized) environment/dataloader for the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        self.setup("fit")

        env_factory = partial(
            self._make_env,
            base_env=self.dataset,
            wrappers=self.valid_wrappers,
            observe_state_directly=self.observe_state_directly,
        )
        env_dataloader = self._make_env_dataloader(
            env_factory,
            batch_size=batch_size or self.batch_size,
            num_workers=num_workers if num_workers is not None else self.num_workers,
            max_steps=self.steps_per_task,
            max_episodes=self.episodes_per_task,
        )
        self.val_env = env_dataloader
        return self.val_env

    def test_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> TestEnvironment:
        """Create the test 'dataloader/gym.Env' for all tasks.
        
        NOTE: This test environment isn't just for the current task, it actually
        contains the sequence of all tasks. This is different than the train or
        validation environments, since if the task labels are available at train
        time, then calling train/valid_dataloader` returns the envs for the
        current task only, and the `.fit` method is called once per task.
        
        This environment is also different in that it is wrapped with a Monitor,
        which we might eventually use to save the results/gifs/logs of the
        testing runs.

        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        TestEnvironment
            A testing environment which keeps track of the performance of the
            actor and accumulates logs/statistics that are used to eventually
            create the 'Result' object.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        self.setup("test")
        # BUG: gym.wrappers.Monitor doesn't want to play nice when applied to
        # Vectorized env, it seems..
        # FIXME: Remove this when the Monitor class works correctly with
        # batched environments.
        batch_size = batch_size or self.batch_size
        if batch_size is not None:
            logger.warn(
                UserWarning(
                    colorize(
                        f"WIP: Only support batch size of `None` (i.e., a single env) "
                        f"for the test environments of RL Settings at the moment, "
                        f"because the Monitor class from gym doesn't work with "
                        f"VectorEnvs. (batch size was {batch_size})",
                        "yellow",
                    )
                )
            )
            batch_size = None

        num_workers = num_workers if num_workers is not None else self.num_workers
        env_factory = partial(
            self._make_env,
            base_env=self.dataset,
            wrappers=self.test_wrappers,
            observe_state_directly=self.observe_state_directly,
        )
        # TODO: Pass the max_steps argument to this `_make_env_dataloader` method,
        # rather than to a `step_limit` on the TestEnvironment.
        env_dataloader = self._make_env_dataloader(
            env_factory, batch_size=batch_size, num_workers=num_workers,
        )
        # TODO: We should probably change the max_steps depending on the
        # batch size of the env.
        test_loop_max_steps = self.test_steps // (batch_size or 1)
        # TODO: Find where to configure this 'test directory' for the outputs of
        # the Monitor.
        test_dir = "results"
        # TODO: Debug wandb Monitor integration.
        self.test_env = ContinualRLTestEnvironment(
            env_dataloader,
            task_schedule=self.test_task_schedule,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            config=self.config,
            force=True,
            video_callable=None if self.config.render else False,
        )
        return self.test_env

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.
        
        In the case of ContinualRL, fit is only called once, with an environment that
        shifts between all the tasks.
        """
        return 1
    
    @staticmethod
    def _make_env(
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        wrappers: List[Callable[[gym.Env], gym.Env]] = None,
        observe_state_directly: bool = False,
    ) -> gym.Env:
        """ Helper function to create a single (non-vectorized) environment. """
        env: gym.Env
        if isinstance(base_env, str):
            if base_env.startswith("MetaMonsterKong") and observe_state_directly:
                env = gym.make(base_env, observe_state=True)
            else:
                env = gym.make(base_env)
        elif isinstance(base_env, gym.Env):
            env = base_env
        elif callable(base_env):
            env = base_env()
        else:
            raise RuntimeError(
                f"base_env should either be a string, a callable, or a gym "
                f"env. (got {base_env})."
            )
        for wrapper in wrappers:
            env = wrapper(env)
        return env

    def _make_env_dataloader(
        self,
        env_factory: Callable[[], gym.Env],
        batch_size: Optional[int],
        num_workers: Optional[int] = None,
        seed: Optional[int] = None,
        max_steps: Optional[int] = None,
        max_episodes: Optional[int] = None,
    ) -> GymDataLoader:
        """ Helper function for creating a (possibly vectorized) environment.
        
        """
        logger.debug(
            f"batch_size: {batch_size}, num_workers: {num_workers}, seed: {seed}"
        )

        env: Union[gym.Env, gym.vector.VectorEnv]
        if batch_size is None:
            env = env_factory()
        else:
            env = make_batched_env(
                env_factory,
                batch_size=batch_size,
                num_workers=num_workers,
                # TODO: Still debugging shared memory + custom spaces (e.g. Sparse).
                shared_memory=False,
            )

        ## Apply the "post-batch" wrappers:
        # from sequoia.common.gym_wrappers import ConvertToFromTensors
        # TODO: Only the BaselineMethod requires this, we should enable it only
        # from the BaselineMethod, and leave it 'off' by default.
        if self.add_done_to_observations:
            env = AddDoneToObservation(env)
        # # Convert the samples to tensors and move them to the right device.
        # env = ConvertToFromTensors(env)
        # env = ConvertToFromTensors(env, device=self.config.device)
        # Add a wrapper that converts numpy arrays / etc to Observations/Rewards
        # and from Actions objects to numpy arrays.
        env = TypedObjectsWrapper(
            env,
            observations_type=self.Observations,
            rewards_type=self.Rewards,
            actions_type=self.Actions,
        )
        # Create an IterableDataset from the env using the EnvDataset wrapper.
        dataset = EnvDataset(env, max_steps=max_steps, max_episodes=max_episodes,)

        # Create a GymDataLoader for the EnvDataset.
        env_dataloader = GymDataLoader(dataset)

        if batch_size and seed:
            # Seed each environment with its own seed (based on the base seed).
            env.seed([seed + i for i in range(env_dataloader.num_envs)])
        else:
            env.seed(seed)

        return env_dataloader

    def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to each training environment.
        
        The result of this method must be pickleable when using
        multiprocessing.
        
        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
        """
        # We add a restriction to prevent users from getting data from
        # previous or future tasks.
        # TODO: This assumes that tasks all have the same length.
        starting_step = self.current_task_id * self.steps_per_task
        max_steps = starting_step + self.steps_per_task - 1
        return self._make_wrappers(
            task_schedule=self.train_task_schedule,
            sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.train_transforms,
            starting_step=starting_step,
            max_steps=max_steps,
            new_random_task_on_reset=self._new_random_task_on_reset,
        )

    def create_valid_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to each validation environment.
        
        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
            
        TODO: Decide how this 'validation' environment should behave in
        comparison with the train and test environments. 
        """
        # We add a restriction to prevent users from getting data from
        # previous or future tasks.
        # TODO: Should the validation environment only be for the current task?
        starting_step = self.current_task_id * self.steps_per_task
        max_steps = starting_step + self.steps_per_task - 1
        return self._make_wrappers(
            task_schedule=self.valid_task_schedule,
            sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.val_transforms,
            starting_step=starting_step,
            max_steps=max_steps,
            new_random_task_on_reset=self._new_random_task_on_reset,
        )

    def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to a single test environment.
        
        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
        """
        return self._make_wrappers(
            task_schedule=self.test_task_schedule,
            sharp_task_boundaries=self.known_task_boundaries_at_test_time,
            task_labels_available=self.task_labels_at_test_time,
            transforms=self.test_transforms,
            starting_step=0,
            max_steps=self.max_steps,
            new_random_task_on_reset=self._new_random_task_on_reset,
        )

    def load_task_schedule(self, file_path: Path) -> Dict[int, Dict]:
        """ Load a task schedule from the given path. """
        with open(file_path) as f:
            task_schedule = json.load(f)
            return {int(k): task_schedule[k] for k in sorted(task_schedule.keys())}

    def _make_wrappers(
        self,
        task_schedule: Dict[int, Dict],
        sharp_task_boundaries: bool,
        task_labels_available: bool,
        transforms: List[Transforms],
        starting_step: int,
        max_steps: int,
        new_random_task_on_reset: bool,
    ) -> List[Callable[[gym.Env], gym.Env]]:
        """ helper function for creating the train/valid/test wrappers. 
        
        These wrappers get applied *before* the batching, if applicable.
        """
        wrappers: List[Callable[[gym.Env], gym.Env]] = []
        # NOTE: When transitions are smooth, there are no "task boundaries".
        assert sharp_task_boundaries == (not self.smooth_task_boundaries)

        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?

        if self.max_episode_steps:
            wrappers.append(
                partial(TimeLimit, max_episode_steps=self.max_episode_steps)
            )

        if is_classic_control_env(self.dataset) and not self.observe_state_directly:
            # If we are in a classic control env, and we dont want the state to
            # be fully-observable (i.e. we want pixel observations rather than
            # getting the pole angle, velocity, etc.), then add the
            # PixelObservation wrapper to the list of wrappers.
            wrappers.append(PixelObservationWrapper)
            wrappers.append(ImageObservations)

        if (
            isinstance(self.dataset, str)
            and self.dataset.lower().startswith("metamonsterkong")
            and not self.observe_state_directly
        ):
            # TODO: Do we need the AtariPreprocessing wrapper on MonsterKong?
            # wrappers.append(partial(AtariPreprocessing, frame_skip=1))
            pass
        elif is_atari_env(self.dataset):
            # TODO: Test & Debug this: Adding the Atari preprocessing wrapper.
            # TODO: Figure out the differences (if there are any) between the
            # AtariWrapper from SB3 and the AtariPreprocessing wrapper from gym.
            wrappers.append(AtariWrapper)
            # wrappers.append(AtariPreprocessing)
            wrappers.append(ImageObservations)

        # Apply image transforms if the env will have image-like obs space
        if not self.observe_state_directly:
            # wrappers.append(ImageObservations)
            # Wrapper to apply the image transforms to the env.
            wrappers.append(partial(TransformObservation, f=transforms))

        # Add a wrapper which will add non-stationarity to the environment.
        # The "task" transitions will either be sharp or smooth.
        # In either case, the task ids for each sample are added to the
        # observations, and the dicts containing the task information (i.e. the
        # current values of the env attributes from the task schedule) get added
        # to the 'info' dicts.
        if sharp_task_boundaries:
            assert self.nb_tasks >= 1
            # Add a wrapper that creates sharp tasks.
            cl_wrapper = MultiTaskEnvironment
        else:
            # Add a wrapper that creates smooth tasks.
            cl_wrapper = SmoothTransitions

        wrappers.append(
            partial(
                cl_wrapper,
                noise_std=self.task_noise_std,
                task_schedule=task_schedule,
                add_task_id_to_obs=True,
                add_task_dict_to_info=True,
                starting_step=starting_step,
                new_random_task_on_reset=new_random_task_on_reset,
                max_steps=max_steps,
            )
        )
        # If the task labels aren't available, we then add another wrapper that
        # hides that information (setting both of them to None) and also marks
        # those spaces as `Sparse`.
        if not task_labels_available:
            # NOTE: This sets the task labels to None, rather than removing
            # them entirely.
            # wrappers.append(RemoveTaskLabelsWrapper)
            wrappers.append(HideTaskLabelsWrapper)

        return wrappers

    def _temp_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """ Gets the minimal wrappers needed to figure out the Spaces of the
        train/valid/test environments.
        
        This is called in the 'constructor' (__post_init__) to set the Setting's
        observation/action/reward spaces, so this should depend on as little
        state from `self` as possible, since not all attributes have been
        defined at the time when this is called. 
        """
        return self._make_wrappers(
            task_schedule=self.train_task_schedule,
            sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.train_transforms,
            # These two shouldn't matter really:
            starting_step=0,
            max_steps=self.max_steps,
            new_random_task_on_reset=self._new_random_task_on_reset,
        )
예제 #8
0
class Config(YamlSerializable):
    name: str = "train"
    bob: int = 123
    some_float: float = 1.23

    points: List[Point] = list_field()
예제 #9
0
class ContinualSLSetting(SLSetting, ContinualAssumption):
    """ Continuous, Task-Agnostic, Continual Supervised Learning.
    
    This is *currently* the most "general" Supervised Continual Learning setting in
    Sequoia.

    - Data distribution changes smoothly over time.
    - Smooth transitions between "tasks"
    - No information about task boundaries or task identity (no task IDs)
    - Maximum of one 'epoch' through the environment.
    """
    # Class variables that hold the 'base' observation/action/reward spaces for the
    # available datasets.
    base_observation_spaces: ClassVar[Dict[str, gym.Space]] = base_observation_spaces
    base_action_spaces: ClassVar[Dict[str, gym.Space]] = base_action_spaces
    base_reward_spaces: ClassVar[Dict[str, gym.Space]] = base_reward_spaces

    # (NOTE: commenting out SLSetting.Observations as it is the same class
    # as Setting.Observations, and we want a consistent method resolution order.
    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    Environment: ClassVar[Type[SLSetting.Environment]] = ContinualSLEnvironment[
        Observations, Actions, Rewards
    ]

    Results: ClassVar[Type[ContinualSLResults]] = ContinualSLResults

    # Class variable holding a dict of the names and types of all available
    # datasets.
    # TODO: Issue #43: Support other datasets than just classification
    available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = {
        c.__name__.lower(): c
        for c in [
            CIFARFellowship,
            MNISTFellowship,
            ImageNet100,
            ImageNet1000,
            CIFAR10,
            CIFAR100,
            EMNIST,
            KMNIST,
            MNIST,
            QMNIST,
            FashionMNIST,
            Synbols,
        ]
        # "synbols": Synbols,
        # "synbols_font": partial(Synbols, task="fonts"),
    }
    # A continual dataset to use. (Should be taken from the continuum package).
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transformations to use. See the Transforms enum for the available values.
    transforms: List[Transforms] = list_field(
        Transforms.to_tensor,
        # BUG: The input_shape given to the Model doesn't have the right number
        # of channels, even if we 'fixed' them here. However the images are fine
        # after.
        Transforms.three_channels,
        Transforms.channels_first_if_needed,
    )

    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes.
    increment: Union[int, List[int]] = list_field(
        2, type=int, nargs="*", alias="n_classes_per_task"
    )
    # The scenario number of tasks.
    # If zero, defaults to the number of classes divied by the increment.
    nb_tasks: int = 0
    # A different task size applied only for the first task.
    # Desactivated if `increment` is a list.
    initial_increment: int = 0
    # An optional custom class order, used for NC.
    class_order: Optional[List[int]] = None
    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes (defaults to the value of
    # `increment`).
    test_increment: Optional[Union[List[int], int]] = None
    # A different task size applied only for the first test task.
    # Desactivated if `test_increment` is a list. Defaults to the
    # value of `initial_increment`.
    test_initial_increment: Optional[int] = None
    # An optional custom class order for testing, used for NC.
    # Defaults to the value of `class_order`.
    test_class_order: Optional[List[int]] = None

    # Wether task boundaries are smooth or not.
    smooth_task_boundaries: bool = flag(True)
    # Wether the context (task) variable is stationary or not.
    stationary_context: bool = flag(False)
    # Wether tasks share the same action space or not.
    # TODO: This will probably be moved into a different assumption.
    shared_action_space: Optional[bool] = None

    # TODO: Need to put num_workers in only one place.
    batch_size: int = field(default=32, cmd=False)
    num_workers: int = field(default=4, cmd=False)

    def __post_init__(self):
        super().__post_init__()
        assert not self.has_setup_fit
        # Test values default to the same as train.
        self.test_increment = self.test_increment or self.increment
        self.test_initial_increment = (
            self.test_initial_increment or self.initial_increment
        )
        self.test_class_order = self.test_class_order or self.class_order

        # TODO: For now we assume a fixed, equal number of classes per task, for
        # sake of simplicity. We could take out this assumption, but it might
        # make things a bit more complicated.
        if isinstance(self.increment, list) and len(self.increment) == 1:
            self.increment = self.increment[0]
        if isinstance(self.test_increment, list) and len(self.test_increment) == 1:
            self.test_increment = self.test_increment[0]
        assert isinstance(self.increment, int)
        assert isinstance(self.test_increment, int)

        if isinstance(self.action_space, spaces.Discrete):
            base_action_space = self.base_action_spaces[self.dataset]
            n_classes = base_action_space.n
            self.class_order = self.class_order or list(range(n_classes))
            if self.nb_tasks:
                self.increment = n_classes // self.nb_tasks

        if not self.nb_tasks:
            base_action_space = self.base_action_spaces[self.dataset]
            if isinstance(base_action_space, spaces.Discrete):
                self.nb_tasks = base_action_space.n // self.increment

        assert self.nb_tasks != 0, self.nb_tasks

        # The 'scenarios' for train and test from continuum. (ClassIncremental for now).
        self.train_cl_loader: Optional[_BaseScenario] = None
        self.test_cl_loader: Optional[_BaseScenario] = None
        self.train_cl_dataset: Optional[_ContinuumDataset] = None
        self.test_cl_dataset: Optional[_ContinuumDataset] = None

        self.train_datasets: List[TaskSet] = []
        self.val_datasets: List[TaskSet] = []
        self.test_datasets: List[TaskSet] = []

        # This will be set by the Experiment, or passed to the `apply` method.
        # TODO: This could be a bit cleaner.
        self.config: Config
        # Default path to which the datasets will be downloaded.
        self.data_dir: Optional[Path] = None

        self.train_env: ContinualSLEnvironment = None  # type: ignore
        self.val_env: ContinualSLEnvironment = None  # type: ignore
        self.test_env: ContinualSLEnvironment = None  # type: ignore

        # BUG: These `has_setup_fit`, `has_setup_test`, `has_prepared_data` properties
        # aren't working correctly: they get set before the call to the function has
        # been executed, making it impossible to check those values from inside those
        # functions.
        self._has_prepared_data = False
        self._has_setup_fit = False
        self._has_setup_test = False

    def apply(
        self, method: Method["ContinualSLSetting"], config: Config = None
    ) -> ContinualSLResults:
        """Apply the given method on this setting to producing some results."""
        # TODO: It still isn't super clear what should be in charge of creating
        # the config, and how to create it, when it isn't passed explicitly.
        self.config = config or self._setup_config(method)
        assert self.config is not None

        method.configure(setting=self)

        # Run the main loop (defined in ContinualAssumption).
        # Basically does the following:
        # 1. Call method.fit(train_env, valid_env)
        # 2. Test the method on test_env.
        # Return the results, as reported by the test environment.
        results: ContinualSLResults = super().main_loop(method)
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results

    def train_dataloader(
        self, batch_size: int = 32, num_workers: Optional[int] = 4
    ) -> EnvironmentType:
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_fit:
            self.setup("fit")

        if self.train_env:
            self.train_env.close()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self._make_train_dataset()
        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?
        env = self.Environment(
            dataset,
            hide_task_labels=(not self.task_labels_at_train_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pin_memory=True,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=False,
            one_epoch_only=(not self.known_task_boundaries_at_train_time),
        )

        if self.config.render:
            # Add a wrapper that calls 'env.render' at each step?
            env = RenderEnvWrapper(env)

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        train_specific_transforms = self.additional_transforms(self.train_transforms)
        if train_specific_transforms:
            env = TransformObservation(env, f=train_specific_transforms)

        if self.monitor_training_performance:
            env = MeasureSLPerformanceWrapper(
                env, first_epoch_only=True, wandb_prefix=f"Train/",
            )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        self.train_env = env
        return self.train_env

    def val_dataloader(
        self, batch_size: int = 32, num_workers: Optional[int] = 4
    ) -> EnvironmentType:
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_fit:
            self.setup("fit")

        if self.val_env:
            self.val_env.close()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self._make_val_dataset()
        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?
        # TODO: Change the reward and action spaces to also use objects.
        env = self.Environment(
            dataset,
            hide_task_labels=(not self.task_labels_at_train_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pin_memory=True,
            batch_size=batch_size,
            num_workers=num_workers,
            one_epoch_only=(not self.known_task_boundaries_at_train_time),
        )

        # TODO: If wandb is enabled, then add customized Monitor wrapper (with
        # IterableWrapper as an additional subclass). There would then be a lot of
        # overlap between such a Monitor and the current TestEnvironment.
        if self.config.render:
            # Add a wrapper that calls 'env.render' at each step?
            env = RenderEnvWrapper(env)

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        val_specific_transforms = self.additional_transforms(self.val_transforms)
        if val_specific_transforms:
            env = TransformObservation(env, f=val_specific_transforms)

        # NOTE: We don't measure online performance on the validation set.
        # if self.monitor_training_performance:
        #     env = MeasureSLPerformanceWrapper(
        #         env,
        #         first_epoch_only=True,
        #         wandb_prefix=f"Train/Task {self.current_task_id}",
        #     )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        self.val_env = env
        return self.val_env

    def test_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> ContinualSLEnvironment[Observations, Actions, Rewards]:
        """ Returns a Continual SL Test environment.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_test:
            self.setup("test")

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self._make_test_dataset()
        env = self.Environment(
            dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            hide_task_labels=(not self.task_labels_at_test_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pretend_to_be_active=True,
            shuffle=False,
            one_epoch_only=True,
        )

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        test_specific_transforms = self.additional_transforms(self.test_transforms)
        if test_specific_transforms:
            env = TransformObservation(env, f=test_specific_transforms)

        # FIXME: Instead of trying to create a 'fake' task schedule for the test
        # environment, instead let the test environment see the task ids, (and then hide
        # them if necessary) so that it can compile the stats for each task based on the
        # task IDs of the observations.

        # TODO: Configure the 'monitoring' dir properly.
        if wandb.run:
            test_dir = wandb.run.dir
        else:
            test_dir = "results"

        test_loop_max_steps = len(dataset) // (env.batch_size or 1)
        test_env = ContinualSLTestEnvironment(
            env,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            force=True,
            config=self.config,
            video_callable=None if (wandb.run or self.config.render) else False,
        )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        if self.test_env:
            self.test_env.close()
        self.test_env = test_env
        return self.test_env

    def prepare_data(self, data_dir: Path = None) -> None:
        # TODO: Pass the transformations to the CL scenario, or to the dataset?
        if data_dir is None:
            if self.config:
                data_dir = self.config.data_dir
            else:
                data_dir = Path("data")

        logger.info(f"Downloading datasets to directory {data_dir}")
        self.train_cl_dataset = self.make_dataset(data_dir, download=True, train=True)
        self.test_cl_dataset = self.make_dataset(data_dir, download=True, train=False)
        return super().prepare_data()

    def setup(self, stage: str = None):
        if not self.has_prepared_data:
            self.prepare_data()
        super().setup(stage=stage)

        if stage not in (None, "fit", "test"):
            raise RuntimeError(f"`stage` should be 'fit', 'test' or None.")

        if stage in (None, "fit"):
            self.train_cl_dataset = self.train_cl_dataset or self.make_dataset(
                self.config.data_dir, download=False, train=True
            )
            self.train_cl_loader = self.train_cl_loader or ClassIncremental(
                cl_dataset=self.train_cl_dataset,
                nb_tasks=self.nb_tasks,
                increment=self.increment,
                initial_increment=self.initial_increment,
                transformations=self.train_transforms,
                class_order=self.class_order,
            )
            if not self.train_datasets and not self.val_datasets:
                for task_id, train_taskset in enumerate(self.train_cl_loader):
                    train_taskset, valid_taskset = split_train_val(
                        train_taskset, val_split=0.1
                    )
                    self.train_datasets.append(train_taskset)
                    self.val_datasets.append(valid_taskset)
                # IDEA: We could do the remapping here instead of adding a wrapper later.
                if self.shared_action_space and isinstance(
                    self.action_space, spaces.Discrete
                ):
                    # If we have a shared output space, then they are all mapped to [0, n_per_task]
                    self.train_datasets = list(map(relabel, self.train_datasets))
                    self.val_datasets = list(map(relabel, self.val_datasets))

        if stage in (None, "test"):
            self.test_cl_dataset = self.test_cl_dataset or self.make_dataset(
                self.config.data_dir, download=False, train=False
            )
            self.test_cl_loader = self.test_cl_loader or ClassIncremental(
                cl_dataset=self.test_cl_dataset,
                nb_tasks=self.nb_tasks,
                increment=self.test_increment,
                initial_increment=self.test_initial_increment,
                transformations=self.test_transforms,
                class_order=self.test_class_order,
            )
            if not self.test_datasets:
                # TODO: If we decide to 'shuffle' the test tasks, then store the sequence of
                # task ids in a new property, probably here.
                # self.test_task_order = list(range(len(self.test_datasets)))
                self.test_datasets = list(self.test_cl_loader)
                # IDEA: We could do the remapping here instead of adding a wrapper later.
                if self.shared_action_space and isinstance(
                    self.action_space, spaces.Discrete
                ):
                    # If we have a shared output space, then they are all mapped to [0, n_per_task]
                    self.test_datasets = list(map(relabel, self.test_datasets))

    def _make_train_dataset(self) -> Union[TaskSet, Dataset]:
        # NOTE: Passing the same seed to `train`/`valid`/`test` is fine, because it's
        # only used for the shuffling used to make the task boundaries smooth.
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(
                self.train_datasets, seed=self.config.seed if self.config else None
            )
        if self.stationary_context:
            joined_dataset = concat(self.train_datasets)
            return shuffle(joined_dataset, seed=self.config.seed)
        if self.known_task_boundaries_at_train_time:
            return self.train_datasets[self.current_task_id]
        else:
            return concat(self.train_datasets)

    def _make_val_dataset(self) -> Dataset:
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(
                self.val_datasets, seed=self.config.seed
            )
        if self.stationary_context:
            joined_dataset = concat(self.val_datasets)
            return shuffle(joined_dataset, seed=self.config.seed)
        if self.known_task_boundaries_at_train_time:
            return self.val_datasets[self.current_task_id]
        return concat(self.val_datasets)

    def _make_test_dataset(self) -> Dataset:
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(
                self.test_datasets, seed=self.config.seed
            )
        else:
            return concat(self.test_datasets)

    def make_dataset(
        self, data_dir: Path, download: bool = True, train: bool = True, **kwargs
    ) -> _ContinuumDataset:
        # TODO: #7 Use this method here to fix the errors that happen when
        # trying to create every single dataset from continuum.
        data_dir = Path(data_dir)

        if not data_dir.exists():
            data_dir.mkdir(parents=True, exist_ok=True)

        if self.dataset in self.available_datasets:
            dataset_class = self.available_datasets[self.dataset]
            return dataset_class(
                data_path=data_dir, download=download, train=train, **kwargs
            )

        elif self.dataset in self.available_datasets.values():
            dataset_class = self.dataset
            return dataset_class(
                data_path=data_dir, download=download, train=train, **kwargs
            )

        elif isinstance(self.dataset, Dataset):
            logger.info(f"Using a custom dataset {self.dataset}")
            return self.dataset

        else:
            raise NotImplementedError

    @property
    def observation_space(self) -> TypedDictSpace[Observations]:
        """ The un-batched observation space, based on the choice of dataset and
        the transforms at `self.transforms` (which apply to the train/valid/test
        environments).

        The returned space is a TypedDictSpace, with the following properties:
        - `x`: observation space (e.g. `Image` space)
        - `task_labels`: Union[Discrete, Sparse[Discrete]]
           The task labels for each sample. When task labels are not available,
           the task labels space is Sparse, and entries will be `None`.
           
        TODO: Replace this property's type with a `Space[Observations]` (and also create
        this `Space` generic)
        """
        x_space = self.base_observation_spaces[self.dataset]
        if not self.transforms:
            # NOTE: When we don't pass any transforms, continuum scenarios still
            # at least use 'to_tensor'.
            x_space = Transforms.to_tensor(x_space)

        # apply the transforms to the observation space.
        for transform in self.transforms:
            x_space = transform(x_space)
        x_space = add_tensor_support(x_space)

        task_label_space = spaces.Discrete(self.nb_tasks)
        if not self.task_labels_at_train_time:
            task_label_space = Sparse(task_label_space, 1.0)
        task_label_space = add_tensor_support(task_label_space)
 
        return TypedDictSpace(
            x=x_space, task_labels=task_label_space, dtype=self.Observations,
        )

    # TODO: Add a `train_observation_space`, `train_action_space`, `train_reward_space`?

    @property
    def action_space(self) -> spaces.Discrete:
        """ Action space for this setting. """
        base_action_space = self.base_action_spaces[self.dataset]
        if isinstance(base_action_space, spaces.Discrete):
            if self.shared_action_space:
                assert isinstance(self.increment, int), (
                    "Need to have same number of classes in each task when "
                    "`shared_action_space` is true."
                )
                return spaces.Discrete(self.increment)
        return base_action_space

        # TODO: IDEA: Have the action space only reflect the number of 'current' classes
        # in order to create a "true" class-incremental learning setting.
        # n_classes_seen_so_far = 0
        # for task_id in range(self.current_task_id):
        #     n_classes_seen_so_far += self.num_classes_in_task(task_id)
        # return spaces.Discrete(n_classes_seen_so_far)

    @property
    def reward_space(self) -> spaces.Discrete:
        base_reward_space = self.base_action_spaces[self.dataset]
        if isinstance(base_reward_space, spaces.Discrete):
            if self.shared_action_space:
                assert isinstance(self.increment, int), (
                    "Need to have same number of classes in each task when "
                    "`shared_action_space` is true."
                )
                return spaces.Discrete(self.increment)
        return base_reward_space

    def additional_transforms(self, stage_transforms: List[Transforms]) -> Compose:
        """ Returns the transforms in `stage_transforms` that are additional transforms
        from those in `self.transforms`.

        For example, if:
        ```
        setting.transforms = Compose([Transforms.Resize(32), Transforms.ToTensor])
        setting.train_transforms = Compose([Transforms.Resize(32), Transforms.ToTensor, Transforms.RandomGrayscale])
        ```
        Then:
        ```
        setting.additional_transforms(setting.train_transforms)
        # will give:
        Compose([Transforms.RandomGrayscale])
        ```
        """
        reference_transforms = self.transforms

        if len(stage_transforms) < len(reference_transforms):
            # Assume no overlap, return all the 'stage' transforms.
            return Compose(stage_transforms)
        if stage_transforms == reference_transforms:
            # Complete overlap, return an empty list.
            return Compose([])

        # IDEA: Only add the additional transforms, compared to the 'base' transforms.
        # As soon as one is different, break.
        i = 0
        for i, t_a, t_b in enumerate(zip(stage_transforms, self.transforms)):
            if t_a != t_b:
                break
        return Compose(stage_transforms[i:])
예제 #10
0
class WandbConfig(Serializable):
    """ Set of configurations options for calling wandb.init directly. """

    # Which user to use
    entity: str = ""

    # project name to use in wandb.
    project: str = ""

    # Name used to easily group runs together.
    # Used to create a parent folder that will contain the `run_name` directory.
    # A unique string shared by all runs in a given group
    # Used to create a parent folder that will contain the `run_name` directory.
    group: Optional[str] = None
    # Wandb run name. If None, will use wandb's automatic name generation
    run_name: Optional[str] = None

    # Identifier unique to each individual wandb run. When given, will try to
    # resume the corresponding run, generates a new ID each time.
    run_id: Optional[str] = None

    # An run number is used to differentiate different iterations of the same experiment.
    # Runs with the same name can be later grouped with wandb to produce stderr plots.
    # TODO: Could maybe use the run_id instead?
    run_number: Optional[int] = None

    # Path where the wandb files should be stored. If the 'WANDB_DIR'
    # environment variable is set, uses that value. Otherwise, defaults to
    # the value of "<log_dir_root>/wandb"
    wandb_path: Optional[Path] = Path(
        os.environ["WANDB_DIR"]
    ) if "WANDB_DIR" in os.environ else None

    # Tags to add to this run with wandb.
    tags: List[str] = list_field()

    # Notes about this particular experiment. (will be logged to wandb if used.)
    notes: Optional[str] = None

    # Root Logging directory.
    log_dir_root: Path = Path("results")

    monitor_gym: bool = True

    # Wandb api key. Useful for preventing the login prompt from wandb from appearing
    # when running on clusters or docker-based setups where the environment variables
    # aren't always shared.
    wandb_api_key: Optional[Union[str, Path]] = field(
        default=os.environ.get("WANDB_API_KEY"),
        to_dict=False,  # Do not serialize this field.
        repr=False,  # Do not show this field in repr().
    )

    # Run offline (data can be streamed later to wandb servers).
    offline: bool = False
    # Enables or explicitly disables anonymous logging.
    anonymous: bool = False
    # Sets the version, mainly used to resume a previous run.
    version: Optional[str] = None

    # Save checkpoints in wandb dir to upload on W&B servers.
    log_model: bool = False

    # Class variables used to check wether wandb.login has already been called or not. 
    logged_in: ClassVar[bool] = False
    key_configured: ClassVar[bool] = False

    @property
    def log_dir(self):
        return self.log_dir_root.joinpath(
            (self.project or ""),
            (self.group or ""),
            (self.run_name or "default"),
            (f"run_{self.run_number}" if self.run_number is not None else ""),
        )

    def wandb_login(self) -> bool:
        """Calls `wandb.login()`.

        Returns
        -------
        bool
            If the key is configured.
        """
        key = None
        if self.wandb_api_key is not None and self.project:
            if Path(self.wandb_api_key).is_file():
                key = Path(self.wandb_api_key).read_text()
            else:
                key = str(self.wandb_api_key)
            assert isinstance(key, str)

        cls = type(self)
        if not cls.logged_in:
            cls.key_configured = wandb.login(key=key)
            cls.logged_in = True
        return cls.key_configured

    def wandb_init_kwargs(self) -> Dict:
        """ Return the kwargs to pass to wandb.init() """
        if self.run_name is None:
            # TODO: Create a run name using the coefficients of the tasks, etc?
            # At the moment, if no run name is given, the 'random' name from wandb is used.
            pass
        if self.wandb_path is None:
            self.wandb_path = self.log_dir_root / "wandb"
        self.wandb_path.mkdir(parents=True, mode=0o777, exist_ok=True)
        return dict(
            dir=str(self.wandb_path),
            project=self.project,
            entity=self.entity,
            name=self.run_name,
            id=self.run_id,
            group=self.group,
            notes=self.notes,
            reinit=True,
            tags=self.tags,
            resume="allow",
            monitor_gym=self.monitor_gym,
        )

    def wandb_init(self, config_dict: Dict = None) -> wandb.wandb_run.Run:
        """Executes the call to `wandb.init()`.

        TODO(@lebrice): Not sure if it still makes sense to call `wandb.init`
        ourselves when using Pytorch Lightning, should probably ask @jeromepl
        for advice on this.

        Args:
            config_dict (Dict): The configuration dictionary. Usually obtained
            by calling `to_dict()` on a `Serializable` dataclass, or `asdict()`
            on a regular dataclass.

        Returns:
            wandb.wandb_run.Run: Whatever gets returned by `wandb.init()`.
        """

        logger.info(f"Wandb run id: {self.run_id}")
        logger.info(
            f"Using wandb. Group name: {self.group} run name: {self.run_name}, "
            f"log_dir: {self.log_dir}"
        )
        self.wandb_login()

        init_kwargs = self.wandb_init_kwargs()
        init_kwargs["config"] = config_dict

        run = wandb.init(
            **init_kwargs
        )
        logger.info(f"Run: {run}")
        if run:
            if self.run_name is None:
                self.run_name = run.name
            # run.save()
            if run.resumed:
                # TODO: add *proper* wandb resuming, probaby by using @nitarshan 's md5 id cool idea.
                # wandb.restore(self.log_dir / "checkpoints")
                pass
        return run

    def make_logger(self, wandb_parent_dir: Path = None) -> WandbLogger:
        logger.info(f"Creating a WandbLogger with using options {self}.")
        self.wandb_login()
        wandb_logger = WandbLogger(
            name=self.run_name,
            save_dir=str(wandb_parent_dir) if wandb_parent_dir else None,
            offline=self.offline,
            id=self.run_id,
            anonymous=self.anonymous,
            version=self.version,
            project=self.project,
            tags=self.tags,
            log_model=self.log_model,
            entity=self.entity,
            group=self.group,
            monitor_gym=self.monitor_gym,
            reinit=True,
        )
        return wandb_logger
예제 #11
0
class IncrementalRLSetting(IncrementalAssumption,
                           DiscreteTaskAgnosticRLSetting):
    """ Continual RL setting in which:
    - Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL)
    - Task boundary information (and task labels) are given at training time
    - Task boundary information is given at test time, but task identity is not.
    """

    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    # The function used to create the tasks for the chosen env.
    _task_sampling_function: ClassVar[Callable[
        ..., IncrementalTask]] = make_incremental_task
    Results: ClassVar[Type[Results]] = IncrementalRLResults

    # Class variable that holds the dict of available environments.
    available_datasets: ClassVar[Dict[str, str]] = available_datasets
    # Which dataset/environment to use for training, validation and testing.
    dataset: str = choice(available_datasets, default="CartPole-v0")

    # # The number of tasks. By default 0, which means that it will be set
    # # depending on other fields in __post_init__, or eventually be just 1.
    # nb_tasks: int = field(0, alias=["n_tasks", "num_tasks"])

    # (Copied from the assumption, just for clarity:)
    # TODO: Shouldn't these kinds of properties be on the class, rather than on the
    # instance?

    # Wether the task boundaries are smooth or sudden.
    smooth_task_boundaries: Final[bool] = constant(False)
    # Wether to give access to the task labels at train time.
    task_labels_at_train_time: Final[bool] = constant(True)
    # Wether to give access to the task labels at test time.
    task_labels_at_test_time: bool = False

    train_envs: List[Union[str, Callable[[], gym.Env]]] = list_field()
    val_envs: List[Union[str, Callable[[], gym.Env]]] = list_field()
    test_envs: List[Union[str, Callable[[], gym.Env]]] = list_field()

    def __post_init__(self):
        if not self.nb_tasks:
            # TODO: In case of the metaworld envs, we could determine the 'default' nb
            # of tasks to use based on the number of available tasks
            pass

        if self.dataset == "MT10":
            from metaworld import MT10, Task, MetaWorldEnv

            self._benchmark = MT10(
                seed=self.config.seed if self.config else None)
            envs: Dict[str, Type[MetaWorldEnv]] = self._benchmark.train_classes
            env_tasks: Dict[str, List[Task]] = {
                env_name: [
                    task for task in self._benchmark.train_tasks
                    if task.env_name == env_name
                ]
                for env_name, env_class in
                self._benchmark.train_classes.items()
            }
            from itertools import islice
            train_env_tasks: Dict[str, List[Task]] = {}
            val_env_tasks: Dict[str, List[Task]] = {}
            test_env_tasks: Dict[str, List[Task]] = {}
            test_fraction = 0.1
            val_fraction = 0.1
            for env_name, env_tasks in env_tasks.items():
                n_tasks = len(env_tasks)
                n_val_tasks = int(max(1, n_tasks * val_fraction))
                n_test_tasks = int(max(1, n_tasks * test_fraction))
                n_train_tasks = len(env_tasks) - n_val_tasks - n_test_tasks
                if n_train_tasks <= 1:
                    # Can't create train, val and test tasks.
                    raise RuntimeError(
                        f"There aren't enough tasks for env {env_name} ({n_tasks}) "
                    )
                tasks_iterator = iter(env_tasks)
                train_env_tasks[env_name] = list(
                    islice(tasks_iterator, n_train_tasks))
                val_env_tasks[env_name] = list(
                    islice(tasks_iterator, n_val_tasks))
                test_env_tasks[env_name] = list(
                    islice(tasks_iterator, n_test_tasks))
                assert train_env_tasks[env_name]
                assert val_env_tasks[env_name]
                assert test_env_tasks[env_name]

            from ..discrete.multienv_wrappers import RandomMultiEnvWrapper
            # TODO: Fix the naming of this MultiTaskEnvironment wrapper:
            from sequoia.common.gym_wrappers import MultiTaskEnvironment
            import operator
            # NOTE: We could do some shuffling here, for instance.
            train_env_names, train_env_classes = zip(
                *list(train_env_tasks.items()))
            val_env_names, val_env_classes = zip(*list(val_env_tasks.items()))
            test_env_names, test_env_classes = zip(
                *list(test_env_tasks.items()))

            self.train_envs = [
                partial(make_metaworld_env,
                        env_class=envs[env_name],
                        tasks=tasks)
                for env_name, tasks in train_env_tasks.items()
            ]
            self.val_envs = [
                partial(make_metaworld_env,
                        env_class=envs[env_name],
                        tasks=tasks)
                for env_name, tasks in val_env_tasks.items()
            ]
            self.test_envs = [
                partial(make_metaworld_env,
                        env_class=envs[env_name],
                        tasks=tasks)
                for env_name, tasks in test_env_tasks.items()
            ]

        # if is_monsterkong_env(self.dataset):
        #     if self.force_pixel_observations:
        #         # Add this to the kwargs that will be passed to gym.make, to make sure that
        #         # we observe pixels, and not state.
        #         self.base_env_kwargs["observe_state"] = False
        #     elif self.force_state_observations:
        #         self.base_env_kwargs["observe_state"] = True

        self._using_custom_envs_foreach_task: bool = False
        if self.train_envs:
            self._using_custom_envs_foreach_task = True
            self.nb_tasks = len(self.train_envs)
            # TODO: Not sure what to do with the `self.dataset` field, because
            # ContinualRLSetting expects to have a single env, while we have more than
            # one, the __post_init__ tries to create the rest of the fields based on
            # `self.dataset`
            self.dataset = self.train_envs[0]

            if not self.val_envs:
                # TODO: Use a wrapper that sets a different random seed
                self.val_envs = self.train_envs.copy()
            if not self.test_envs:
                # TODO: Use a wrapper that sets a different random seed
                self.test_envs = self.train_envs.copy()
            if (self.train_task_schedule or self.val_task_schedule
                    or self.test_task_schedule):
                raise RuntimeError(
                    "You can either pass `train/valid/test_envs`, or a task schedule, "
                    "but not both!")
        else:
            if self.val_envs or self.test_envs:
                raise RuntimeError(
                    "Can't pass `val_envs` or `test_envs` without passing `train_envs`."
                )

        super().__post_init__()

        if self._using_custom_envs_foreach_task:
            # TODO: Use 'no-op' task schedules for now.
            # self.train_task_schedule.clear()
            # self.val_task_schedule.clear()
            # self.test_task_schedule.clear()
            pass

            # TODO: Check that all the envs have the same observation spaces!
            # (If possible, find a way to check this without having to instantiate all
            # the envs.)

        # TODO: If the dataset has a `max_path_length` attribute, then it's probably
        # a Mujoco / metaworld / etc env, and so we set a limit on the episode length to
        # avoid getting an error.
        max_path_length: Optional[int] = getattr(self._temp_train_env,
                                                 "max_path_length", None)
        if self.max_episode_steps is None and max_path_length is not None:
            assert max_path_length > 0
            self.max_episode_steps = max_path_length

        # if self.dataset == "MetaMonsterKong-v0":
        #     # TODO: Limit the episode length in monsterkong?
        #     # TODO: Actually end episodes when reaching a task boundary, to force the
        #     # level to change?
        #     self.max_episode_steps = self.max_episode_steps or 500

        # FIXME: Really annoying little bugs with these three arguments!
        # self.nb_tasks = self.max_steps // self.steps_per_task

    @property
    def train_task_lengths(self) -> List[int]:
        """ Gives the length of each training task (in steps for now). """
        return [
            task_b_step - task_a_step for task_a_step, task_b_step in pairwise(
                sorted(self.train_task_schedule.keys()))
        ]

    @property
    def train_phase_lengths(self) -> List[int]:
        """ Gives the length of each training 'phase', i.e. the maximum number of (steps
        for now) that can be taken in the training environment, in a single call to .fit
        """
        return [
            task_b_step - task_a_step for task_a_step, task_b_step in pairwise(
                sorted(self.train_task_schedule.keys()))
        ]

    @property
    def current_train_task_length(self) -> int:
        """ Deprecated field, gives back the max number of steps per task. """
        if self.stationary_context:
            return sum(self.train_task_lengths)
        return self.train_task_lengths[self.current_task_id]

    # steps_per_task: int = deprecated_property(
    #     "steps_per_task", "current_train_task_length"
    # )
    # @property
    # def steps_per_task(self) -> int:
    #     # unique_task_lengths = list(set(self.train_task_lengths))
    #     warning = DeprecationWarning(
    #         "The 'steps_per_task' attribute is deprecated, use "
    #         "`current_train_task_length` instead, which gives the length of the "
    #         "current task."
    #     )
    #     warnings.warn(warning)
    #     logger.warning(warning)
    #     return self.current_train_task_length

    # @property
    # def steps_per_phase(self) -> int:
    #     # return unique_task_lengths
    #     test_task_lengths: List[int] = [
    #         task_b_step - task_a_step
    #         for task_a_step, task_b_step in pairwise(
    #             sorted(self.test_task_schedule.keys())
    #         )
    #     ]

    @property
    def task_label_space(self) -> gym.Space:
        # TODO: Explore an alternative design for the task sampling, based more around
        # gym spaces rather than the generic function approach that's currently used?
        # IDEA: Might be cleaner to put this in the assumption class
        task_label_space = spaces.Discrete(self.nb_tasks)
        if not self.task_labels_at_train_time or not self.task_labels_at_test_time:
            sparsity = 1
            if self.task_labels_at_train_time ^ self.task_labels_at_test_time:
                # We have task labels "50%" of the time, ish:
                sparsity = 0.5
            task_label_space = Sparse(task_label_space, sparsity=sparsity)
        return task_label_space

    def setup(self, stage: str = None) -> None:
        # Called before the start of each task during training, validation and
        # testing.
        super().setup(stage=stage)
        # What's done in ContinualRLSetting:
        # if stage in {"fit", None}:
        #     self.train_wrappers = self.create_train_wrappers()
        #     self.valid_wrappers = self.create_valid_wrappers()
        # elif stage in {"test", None}:
        #     self.test_wrappers = self.create_test_wrappers()
        if self._using_custom_envs_foreach_task:
            logger.debug(
                f"Using custom environments from `self.[train/val/test]_envs` for task "
                f"{self.current_task_id}.")
            # NOTE: Here is how this supports passing custom envs for each task: We just
            # switch out the value of this property, and let the
            # `train/val/test_dataloader` methods work as usual!
            self.dataset = self.train_envs[self.current_task_id]
            self.val_dataset = self.val_envs[self.current_task_id]
            # TODO: The test loop goes through all the envs, hence this doesn't really
            # work.
            self.test_dataset = self.test_envs[self.current_task_id]

            # TODO: Check that the observation/action spaces are all the same for all
            # the train/valid/test envs
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.train_envs,
                wrappers=self.train_wrappers,
            )
            # TODO: Inconsistent naming between `val_envs` and `valid_wrappers` etc.
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.val_envs,
                wrappers=self.valid_wrappers,
            )
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.test_envs,
                wrappers=self.test_wrappers,
            )
        else:
            # TODO: Should we populate the `self.train_envs`, `self.val_envs` and
            # `self.test_envs` fields here as well, just to be consistent?
            # base_env = self.dataset
            # def task_env(task_index: int) -> Callable[[], MultiTaskEnvironment]:
            #     return self._make_env(
            #         base_env=base_env,
            #         wrappers=[],
            #     )
            # self.train_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # self.val_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # self.test_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # assert False, self.train_task_schedule
            pass

    # def _setup_fields_using_temp_env(self, temp_env: MultiTaskEnvironment):
    #     """ Setup some of the fields on the Setting using a temporary environment.

    #     This temporary environment only lives during the __post_init__() call.
    #     """
    #     super()._setup_fields_using_temp_env(temp_env)
    def test_dataloader(self,
                        batch_size: Optional[int] = None,
                        num_workers: Optional[int] = None):
        if not self._using_custom_envs_foreach_task:
            return super().test_dataloader(batch_size=batch_size,
                                           num_workers=num_workers)

        # raise NotImplementedError("TODO:")

        # IDEA: Pretty hacky, but might be cleaner than adding fields for the moment.
        test_max_steps = self.test_max_steps
        test_max_episodes = self.test_max_episodes
        self.test_max_steps = test_max_steps // self.nb_tasks
        if self.test_max_episodes:
            self.test_max_episodes = test_max_episodes // self.nb_tasks
        # self.test_env = self.TestEnvironment(self.test_envs[self.current_task_id])

        task_test_env = super().test_dataloader(batch_size=batch_size,
                                                num_workers=num_workers)

        self.test_max_steps = test_max_steps
        self.test_max_episodes = test_max_episodes
        return task_test_env

    def test_loop(self, method: Method["IncrementalRLSetting"]):
        if not self._using_custom_envs_foreach_task:
            return super().test_loop(method)

        # TODO: If we're using custom envs for each task, then the test loop needs to be
        # re-organized.
        # raise NotImplementedError(
        #     f"TODO: Need to add a wrapper that can switch between envs, or "
        #     f"re-write the test loop."
        # )
        assert self.nb_tasks == len(self.test_envs), "assuming this for now."
        test_envs = []
        for task_id in range(self.nb_tasks):
            # TODO: Make sure that self.test_dataloader() uses the right number of steps
            # per test task (current hard-set to self.test_max_steps).
            task_test_env = self.test_dataloader()
            test_envs.append(task_test_env)
        from ..discrete.multienv_wrappers import ConcatEnvsWrapper

        on_task_switch_callback = getattr(method, "on_task_switch", None)
        joined_test_env = ConcatEnvsWrapper(
            test_envs,
            add_task_ids=self.task_labels_at_test_time,
            on_task_switch_callback=on_task_switch_callback,
        )
        # TODO: Use this 'joined' test environment in this test loop somehow.
        # IDEA: Hacky way to do it: (I don't think this will work as-is though)
        _test_dataloader_method = self.test_dataloader
        self.test_dataloader = lambda *args, **kwargs: joined_test_env
        super().test_loop(method)
        self.test_dataloader = _test_dataloader_method

        test_loop_results = DiscreteTaskAgnosticRLSetting.Results()
        for task_id, test_env in enumerate(test_envs):
            # TODO: The results are still of the wrong type, because we aren't changing
            # the type of test environment or the type of Results
            results_of_wrong_type: IncrementalRLResults = test_env.get_results(
            )
            # For now this weird setup means that there will be only one 'result'
            # object in this that actually has metrics:
            # assert results_of_wrong_type.task_results[task_id].metrics
            all_metrics: List[EpisodeMetrics] = sum([
                result.metrics for result in results_of_wrong_type.task_results
            ], [])
            n_metrics_in_each_result = [
                len(result.metrics)
                for result in results_of_wrong_type.task_results
            ]
            # assert all(n_metrics == 0 for i, n_metrics in enumerate(n_metrics_in_each_result) if i != task_id), (n_metrics_in_each_result, task_id)
            # TODO: Also transfer the other properties like runtime, online performance,
            # etc?
            # TODO: Maybe add addition for these?
            # task_result = sum(results_of_wrong_type.task_results)
            task_result = TaskResults(metrics=all_metrics)
            # task_result: TaskResults[EpisodeMetrics] = results_of_wrong_type.task_results[task_id]
            test_loop_results.task_results.append(task_result)
        return test_loop_results

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        In this Incremental-RL Setting, fit is called once per task.
        (Same as ClassIncrementalSetting in SL).
        """
        return self.nb_tasks

    @staticmethod
    def _make_env(
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        wrappers: List[Callable[[gym.Env], gym.Env]] = None,
        **base_env_kwargs: Dict,
    ) -> gym.Env:
        """ Helper function to create a single (non-vectorized) environment.

        This is also used to create the env whenever `self.dataset` is a string that
        isn't registered in gym. This happens for example when using an environment from
        meta-world (or mtenv).
        """
        # Check if the env is registed in a known 'third party' gym-like package, and if
        # needed, create the base env in the way that package requires.
        if isinstance(base_env, str):
            env_id = base_env

            # Check if the id belongs to mtenv
            if MTENV_INSTALLED and env_id in mtenv_envs:
                from mtenv import make as mtenv_make

                # This is super weird. Don't undestand at all
                # why they are doing this. Makes no sense to me whatsoever.
                base_env = mtenv_make(env_id, **base_env_kwargs)

                # Add a wrapper that will remove the task information, because we use
                # the same MultiTaskEnv wrapper for all the environments.
                wrappers.insert(0, MTEnvAdapterWrapper)

            if METAWORLD_INSTALLED and env_id in metaworld_envs:
                # TODO: Should we use a particular benchmark here?
                # For now, we find the first benchmark that has an env with this name.
                import metaworld

                for benchmark_class in [metaworld.ML10]:
                    benchmark = benchmark_class()
                    if env_id in benchmark.train_classes.keys():
                        # TODO: We can either let the base_env be an env type, or
                        # actually instantiate it.
                        base_env: Type[MetaWorldEnv] = benchmark.train_classes[
                            env_id]
                        # NOTE: (@lebrice) Here I believe it's better to just have the
                        # constructor, that way we re-create the env for each task.
                        # I think this might be better, as I don't know for sure that
                        # the `set_task` can be called more than once in metaworld.
                        # base_env = base_env_type()
                        break
                else:
                    raise NotImplementedError(
                        f"Can't find a metaworld benchmark that uses env {env_id}"
                    )

        return ContinualRLSetting._make_env(
            base_env=base_env,
            wrappers=wrappers,
            **base_env_kwargs,
        )

    def create_task_schedule(self, temp_env: gym.Env,
                             change_steps: List[int]) -> Dict[int, Dict]:
        task_schedule: Dict[int, Dict] = {}
        if self._using_custom_envs_foreach_task:
            # If custom envs were passed to be used for each task, then we don't create
            # a "task schedule", because the only reason we're using a task schedule is
            # when we want to change something about the 'base' env in order to get
            # multiple tasks.
            # Create a task schedule dict, just to fit in?
            for i, task_step in enumerate(change_steps):
                task_schedule[task_step] = {}
            return task_schedule

        # TODO: Make it possible to use something other than steps as keys in the task
        # schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or
        # Steps(10), something like that!
        # IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!!
        for step in change_steps:
            # TODO: Add a `stage` argument (an enum or something with 'train', 'valid'
            # 'test' as values, and pass it to this function. Tasks should be the same
            # in train/valid for now, given the same task Id.
            # TODO: When the Results become able to handle a different ordering of tasks
            # at train vs test time, allow the test task schedule to have different
            # ordering than train / valid.
            task = type(self)._task_sampling_function(
                temp_env,
                step=step,
                change_steps=change_steps,
                seed=self.config.seed if self.config else None,
            )
            task_schedule[step] = task

        return task_schedule

    def create_train_wrappers(self):
        # TODO: Clean this up a bit?
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.train_task_schedule.values())
            base_env = self.train_envs[self.current_task_id]
        else:
            base_env = self.train_dataset
        # assert False, super().create_train_wrappers()
        if self.stationary_context:
            task_schedule_slice = self.train_task_schedule.copy()
            assert len(task_schedule_slice) >= 2
            # Need to pop the last task, so that we don't sample it by accident!
            max_step = max(task_schedule_slice)
            last_task = task_schedule_slice.pop(max_step)
            # TODO: Shift the second-to-last task to the last step
            last_boundary = max(task_schedule_slice)
            second_to_last_task = task_schedule_slice.pop(last_boundary)
            task_schedule_slice[max_step] = second_to_last_task
            if 0 not in task_schedule_slice:
                assert self.nb_tasks == 1
                task_schedule_slice[0] = second_to_last_task
            # assert False, (max_step, last_boundary, last_task, second_to_last_task)
        else:
            current_task = list(
                self.train_task_schedule.values())[self.current_task_id]
            task_length = self.train_max_steps // self.nb_tasks
            task_schedule_slice = {
                0: current_task,
                task_length: current_task,
            }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.train_transforms,
            starting_step=0,
            max_steps=max(task_schedule_slice.keys()),
            new_random_task_on_reset=self.stationary_context,
        )

    def create_valid_wrappers(self):
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.val_task_schedule.values())
            base_env = self.val_envs[self.current_task_id]
        else:
            base_env = self.val_dataset
        # assert False, super().create_train_wrappers()
        if self.stationary_context:
            task_schedule_slice = self.val_task_schedule
        else:
            current_task = list(
                self.val_task_schedule.values())[self.current_task_id]
            task_length = self.train_max_steps // self.nb_tasks
            task_schedule_slice = {
                0: current_task,
                task_length: current_task,
            }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.val_transforms,
            starting_step=0,
            max_steps=max(task_schedule_slice.keys()),
            new_random_task_on_reset=self.stationary_context,
        )

    def create_test_wrappers(self):
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.test_task_schedule.values())
            base_env = self.test_envs[self.current_task_id]
        else:
            base_env = self.test_dataset
        # assert False, super().create_train_wrappers()
        task_schedule_slice = self.test_task_schedule
        # if self.stationary_context:
        # else:
        #     current_task = list(self.test_task_schedule.values())[self.current_task_id]
        #     task_length = self.test_max_steps // self.nb_tasks
        #     task_schedule_slice = {
        #         0: current_task,
        #         task_length: current_task,
        #     }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.test_transforms,
            starting_step=0,
            max_steps=self.test_max_steps,
            new_random_task_on_reset=self.stationary_context,
        )

    def _check_all_envs_have_same_spaces(
        self,
        envs_or_env_functions: List[Union[str, gym.Env, Callable[[],
                                                                 gym.Env]]],
        wrappers: List[Callable[[gym.Env], gym.Wrapper]],
    ) -> None:
        """ Checks that all the environments in the list have the same
        observation/action spaces.
        """
        if self._using_custom_envs_foreach_task:
            # TODO: Removing this check for now.
            return
        first_env = self._make_env(base_env=envs_or_env_functions[0],
                                   wrappers=wrappers,
                                   **self.base_env_kwargs)
        first_env.close()
        for task_id, task_env_id_or_function in zip(
                range(1, len(envs_or_env_functions)),
                envs_or_env_functions[1:]):
            task_env = self._make_env(
                base_env=task_env_id_or_function,
                wrappers=wrappers,
                **self.base_env_kwargs,
            )
            task_env.close()
            if task_env.observation_space != first_env.observation_space:
                raise RuntimeError(
                    f"Env at task {task_id} doesn't have the same observation "
                    f"space ({task_env.observation_space}) as the environment of "
                    f"the first task: {first_env.observation_space}.")
            if task_env.action_space != first_env.action_space:
                raise RuntimeError(
                    f"Env at task {task_id} doesn't have the same action "
                    f"space ({task_env.action_space}) as the environment of "
                    f"the first task: {first_env.action_space}")

    def _make_wrappers(
        self,
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        task_schedule: Dict[int, Dict],
        # sharp_task_boundaries: bool,
        task_labels_available: bool,
        transforms: List[Transforms],
        starting_step: int,
        max_steps: int,
        new_random_task_on_reset: bool,
    ) -> List[Callable[[gym.Env], gym.Env]]:
        if self._using_custom_envs_foreach_task:
            if task_schedule:
                logger.warning(
                    RuntimeWarning(
                        f"Ignoring task schedule {task_schedule}, since custom envs were "
                        f"passed for each task!"))
            task_schedule = None

        wrappers = super()._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule,
            task_labels_available=task_labels_available,
            transforms=transforms,
            starting_step=starting_step,
            max_steps=max_steps,
            new_random_task_on_reset=new_random_task_on_reset,
        )

        if self._using_custom_envs_foreach_task:
            # If the user passed a specific env to use for each task, then there won't
            # be a MultiTaskEnv wrapper in `wrappers`, since the task schedule is
            # None/empty.
            # Instead, we will add a Wrapper that always gives the task ID of the
            # current task.

            # TODO: There are some 'unused' args above: `starting_step`, `max_steps`,
            # `new_random_task_on_reset` which are still passed to the super() call, but
            # just unused.
            if new_random_task_on_reset:
                raise NotImplementedError(
                    "TODO: Add a MultiTaskEnv wrapper of some sort that alternates "
                    " between the source envs.")

            assert not task_schedule
            task_label = self.current_task_id
            task_label_space = spaces.Discrete(self.nb_tasks)
            if not task_labels_available:
                task_label = None
                task_label_space = Sparse(task_label_space, sparsity=1.0)

            wrappers.append(
                partial(
                    AddTaskIDWrapper,
                    task_label=task_label,
                    task_label_space=task_label_space,
                ))

        if is_monsterkong_env(base_env):
            # TODO: Need to register a MetaMonsterKong-State-v0 or something like that!
            # TODO: Maybe add another field for 'force_state_observations' ?
            # if self.force_pixel_observations:
            pass

        return wrappers
예제 #12
0
class Setting(SettingABC,
              Parseable,
              Serializable,
              LightningDataModule,
              Generic[EnvironmentType],
              metaclass=SettingMeta):
    """ Base class for all research settings in ML: Root node of the tree. 

    A 'setting' is loosely defined here as a learning problem with a specific
    set of assumptions, restrictions, and an evaluation procedure.
    
    For example, Reinforcement Learning is a type of Setting in which we assume
    that an Agent is able to observe an environment, take actions upon it, and 
    receive rewards back from the environment. Some of the assumptions include
    that the reward is dependant on the action taken, and that the actions have
    an impact on the environment's state (and on the next observations the agent
    will receive). The evaluation procedure consists in trying to maximize the
    reward obtained from an environment over a given number of steps.
        
    This 'Setting' class should ideally represent the most general learning
    problem imaginable, with almost no assumptions about the data or evaluation
    procedure.

    This is a dataclass. Its attributes are can also be used as command-line
    arguments using `simple_parsing`.
    
    Abstract (required) methods:
    - **apply** Applies a given Method on this setting to produce Results.
    - **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).
    - **setup**  (things to do on every accelerator in distributed mode).
    - **train_dataloader** the training environment/dataloader.
    - **val_dataloader** the val environments/dataloader(s).
    - **test_dataloader** the test environments/dataloader(s).

    "Abstract"-ish (required) class attributes:
    - `Results`: The class of Results that are created when applying a Method on
      this setting.
    - `Observations`: The type of Observations that will be produced  in this
        setting.
    - `Actions`: The type of Actions that are expected from this setting.
    - `Rewards`: The type of Rewards that this setting will (potentially) return
      upon receiving an action from the method.
    """
    ## ---------- Class Variables ------------- 
    ## Fields in this block are class attributes. They don't create command-line
    ## arguments.
    
    # Type of Observations that the dataloaders (a.k.a. "environments") will
    # produce for this type of Setting.
    Observations: ClassVar[Type[Observations]] = Observations
    # Type of Actions that the dataloaders (a.k.a. "environments") will receive
    # through their `send` method, for this type of Setting.
    Actions: ClassVar[Type[Actions]] = Actions
    # Type of Rewards that the dataloaders (a.k.a. "environments") will return
    # after receiving an action, for this type of Setting.
    Rewards: ClassVar[Type[Rewards]] = Rewards
    
    # The type of Results that are given back when a method is applied on this
    # Setting. The `Results` class basically defines the 'evaluation metric' for
    # a given type of setting. See the `Results` class for more info.
    Results: ClassVar[Type[Results]] = Results
    
    available_datasets: ClassVar[Dict[str, Any]] = {}
    
    ##
    ##   -------------
    # Transforms to be applied to the observatons of the train/valid/test
    # environments.
    transforms: List[Transforms] = list_field()

    # Transforms to be applied to the training datasets.
    train_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
    # Transforms to be applied to the validation datasets. 
    val_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
    # Transforms to be applied to the testing datasets.
    test_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)

    # Fraction of training data to use to create the validation set.
    # (Only applicable in Passive settings.)
    val_fraction: float = 0.2

    # TODO: Still not sure where exactly we should be adding the 'batch_size'
    # and 'num_workers' arguments. Adding it here for now with cmd=False, so
    # that they can be passed to the constructor of the Setting.
    batch_size: Optional[int] = field(default=None, cmd=False)
    num_workers: Optional[int] = field(default=None, cmd=False)
    
    # # TODO: Add support for semi-supervised training.
    # # Fraction of the dataset that is labeled.
    # labeled_data_fraction: int = 1.0
    # # Number of labeled examples.
    # n_labeled_examples: Optional[int] = None

    def __post_init__(self,
                      observation_space: gym.Space = None,
                      action_space: gym.Space = None,
                      reward_space: gym.Space = None):
        """ Initializes the fields of the setting that weren't set from the
        command-line.
        """
        logger.debug(f"__post_init__ of Setting")
        if len(self.train_transforms) == 1 and isinstance(self.train_transforms[0], list):
            self.train_transforms = self.train_transforms[0]
        if len(self.val_transforms) == 1 and isinstance(self.val_transforms[0], list):
            self.val_transforms = self.val_transforms[0]
        if len(self.test_transforms) == 1 and isinstance(self.test_transforms[0], list):
            self.test_transforms = self.test_transforms[0]

        # Actually compose the list of Transforms or callables into a single transform.
        self.train_transforms: Compose = Compose(self.train_transforms)
        self.val_transforms: Compose = Compose(self.val_transforms)
        self.test_transforms: Compose = Compose(self.test_transforms)

        LightningDataModule.__init__(self,
            train_transforms=self.train_transforms,
            val_transforms=self.val_transforms,
            test_transforms=self.test_transforms,
        )
        
        self._observation_space = observation_space
        self._action_space = action_space
        self._reward_space = reward_space

        # TODO: It's a bit confusing to also have a `config` attribute on the
        # Setting. Might want to change this a bit.
        self.config: Config = None

        self.train_env: Environment = None  # type: ignore
        self.val_env: Environment = None  # type: ignore
        self.test_env: Environment = None  # type: ignore
    

    @abstractmethod
    def apply(self, method: Method, config: Config = None) -> "Setting.Results":
        # NOTE: The actual train/test loop should be defined in a more specific
        # setting. This is just here as an illustration of what that could look
        # like. 
        assert False, "this is just here for illustration purposes. "
        
        method.fit(
            train_env=self.train_dataloader(),
            valid_env=self.val_dataloader(),
        )

        # Test loop:
        test_env = self.test_dataloader()
        test_metrics = []
        # Number of episodes to test on:
        n_test_episodes = 1

        # Perform a set number of episodes in the test environment.
        for episode in range(n_test_episodes):
            # Get initial observations.
            observations = test_env.reset()
            
            for i in itertools.count():
                # Get the predictions/actions for a batch of observations.
                actions = method.get_actions(observations, test_env.action_space)
                observations, rewards, done, info = test_env.step(actions)
                # Calculate the 'metrics' (TODO: This should be done be in the env!)
                batch_metrics = self.get_metrics(actions=actions, rewards=rewards)
                test_metrics.append(batch_metrics)
                if done:
                    break

        return self.Results(test_metrics=test_metrics)

    def get_metrics(self,
                    actions: Actions,
                    rewards: Rewards) -> Union[float, Metrics]:
        """ Calculate the "metric" from the model predictions (actions) and the true labels (rewards).
        
        In this example, we return a 'Metrics' object:
        - `ClassificationMetrics` for classification problems,
        - `RegressionMetrics` for regression problems.
        
        We use these objects because they are awesome (they basically simplify
        making plots, wandb logging, and serialization), but you can also just
        return floats if you want, no problem.
        
        TODO: This is duplicated from Incremental. Need to fix this.
        """
        from sequoia.common.metrics import get_metrics
        # In this particular setting, we only use the y_pred from actions and
        # the y from the rewards.
        if isinstance(actions, Actions):
            actions = torch.as_tensor(actions.y_pred)
        if isinstance(rewards, Rewards):
            rewards = torch.as_tensor(rewards.y)
        # TODO: At the moment there's this problem, ClassificationMetrics wants
        # to create a confusion matrix, which requires 'logits' (so it knows how
        # many classes.
        if isinstance(actions, Tensor):
            actions = actions.cpu().numpy()
        if isinstance(rewards, Tensor):
            rewards = rewards.cpu().numpy()
        
        # assert actions in self.action_space, f"Invalid actions {actions} (space = {self.action_space})"
        # assert rewards in self.reward_space, f"Invalid rewards? {rewards} (space = {self.reward_space})"
        
        if isinstance(self.action_space, spaces.Discrete):
            batch_size = rewards.shape[0]
            actions = torch.as_tensor(actions)
            if len(actions.shape) == 1 or (actions.shape[-1] == 1 and self.action_space.n != 2):
                fake_logits = torch.zeros([batch_size, self.action_space.n], dtype=int)
                # FIXME: There must be a smarter way to do this indexing.
                for i, action in enumerate(actions):
                    fake_logits[i, action] = 1
                actions = fake_logits

        return get_metrics(y_pred=actions, y=rewards)
    
    @property
    def image_space(self) -> Optional[gym.Space]:
        if isinstance(self.observation_space, spaces.Box):
            return self.observation_space
        if isinstance(self.observation_space, spaces.Tuple):
            assert isinstance(self.observation_space[0], spaces.Box)
            return self.observation_space[0]
        if isinstance(self.observation_space, spaces.Dict):
            return self.observation_space.spaces["x"]
        logger.warning(f"Don't know what the image space is. "
                       f"(self.observation_space={self.observation_space})")
        return None

    @property
    def observation_space(self) -> gym.Space:
        return self._observation_space

    @observation_space.setter
    def observation_space(self, value: gym.Space) -> None:
        """Sets a the observation space.
        
        NOTE: This also changes the value of the `dims` attribute and the result
        of the `size()` method from LightningDataModule.
        """
        if not isinstance(value, gym.Space):
            raise RuntimeError(f"Value must be a `gym.Space` (got {value})")
        if not self._dims:
            if isinstance(value, spaces.Box):
                self.dims = value.shape
            elif isinstance(value, spaces.Tuple):
                self.dims = tuple(space.shape for space in value.spaces)
            elif isinstance(value, spaces.Dict) and "x" in value.spaces:
                self.dims = value.spaces["x"].shape
            else:
                raise NotImplementedError(
                    f"Don't know how to set the 'dims' attribute using "
                    f"observation space {value}"
                )
        self._observation_space = value

    @property
    def action_space(self) -> gym.Space:
        return self._action_space

    @action_space.setter
    def action_space(self, value: gym.Space) -> None:
        self._action_space = value

    @property
    def reward_space(self) -> gym.Space:
        return self._reward_space

    @reward_space.setter
    def reward_space(self, value: gym.Space) -> None:
        self._reward_space = value
    
    @classmethod
    def get_available_datasets(cls) -> Iterable[str]:
        """ Returns an iterable of strings which represent the names of datasets. """
        return cls.available_datasets

    @classmethod
    def main(cls, argv: Optional[Union[str, List[str]]]=None) -> Results:
        from sequoia.main import Experiment
        experiment: Experiment
        # Create the Setting object from the command-line:
        setting = cls.from_args(argv)
        # Then create the 'Experiment' from the command-line, which makes it
        # possible to choose between all the methods.
        experiment = Experiment.from_args(argv)
        # fix the setting attribute to be the one parsed above.
        experiment.setting = setting
        results: ResultsType = experiment.launch(argv)
        return results

    def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type["Method"], Results]:
        applicable_methods = self.get_applicable_methods()
        from sequoia.methods import Method
        all_results: Dict[Type[Method], Results] = {}
        config = Config.from_args(argv)
        for method_type in applicable_methods:
            method = method_type.from_args(argv)
            results = self.apply(method, config)
            all_results[method_type] = results
        logger.info(f"All results for setting of type {type(self)}:")
        logger.info({
            method.get_name(): (results.get_metric() if results else "crashed")
            for method, results in all_results.items()
        })
        return all_results

    @classmethod
    def get_path_to_source_file(cls: Type) -> Path:
        from sequoia.utils.utils import get_path_to_source_file
        return get_path_to_source_file(cls)

    def _check_environments(self):
        """ Do a quick check to make sure that interacting with the envs/dataloaders
        works correctly.
        """
        from sequoia.settings.passive import PassiveEnvironment
        from sequoia.settings.active import ActiveEnvironment
        
        # Check that the env's spaces are batched versions of the settings'.
        from gym.vector.utils import batch_space

        batch_size = self.batch_size
        for loader_method in [self.train_dataloader, self.val_dataloader, self.test_dataloader]:
            print(f"\n\nChecking loader method {loader_method.__name__}\n\n")
            env = loader_method(batch_size=batch_size)
            
            batch_size = env.batch_size

            # We could compare the spaces directly, but that's a bit messy, and
            # would be depends on the type of spaces for each. Instead, we could
            # check samples from such spaces on how the spaces are batched. 
            if batch_size:
                expected_observation_space = batch_space(self.observation_space, n=batch_size)
                expected_action_space = batch_space(self.action_space, n=batch_size)
                expected_reward_space = batch_space(self.reward_space, n=batch_size)
            else:
                expected_observation_space = self.observation_space
                expected_action_space = self.action_space
                expected_reward_space = self.reward_space

            # TODO: Batching the 'Sparse' makes it really ugly, so just
            # comparing the 'image' portion of the space for now.
            assert env.observation_space[0].shape == expected_observation_space[0].shape, (env.observation_space[0], expected_observation_space[0])
            # assert env.observation_space[0] == expected_observation_space[0], (env.observation_space[0], expected_observation_space[0])
            # assert env.observation_space[1] == expected_observation_space[1], (
            #     f"env obs space: {env.observation_space[1]}, \n"
            #     f"expected obs space: {expected_observation_space[1]}"
            # )

            assert env.action_space == expected_action_space, (env.action_space, expected_action_space)
            assert env.reward_space == expected_reward_space, (env.reward_space, expected_reward_space)

            # Check that the 'gym API' interaction is working correctly.
            reset_obs: Observations = env.reset()
            self._check_observations(env, reset_obs)

            for i in range(5):
                actions = env.action_space.sample()
                self._check_actions(env, actions)
                step_observations, step_rewards, done, info = env.step(actions)
                self._check_observations(env, step_observations)
                self._check_rewards(env, step_rewards)
                if batch_size:
                    assert not any(done)
                else:
                    assert not done
                # assert not (done if isinstance(done, bool) else any(done))

            for batch in take(env, 5):
                observations: Observations
                rewards: Optional[Rewards]
                
                if isinstance(env, PassiveEnvironment):
                    observations, rewards = batch
                else:
                    # in RL atm, the 'dataset' gives back only the observations.
                    # Coul
                    observations, rewards = batch, None

                self._check_observations(env, observations)
                if rewards is not None:
                    self._check_rewards(env, rewards)
                
                if batch_size:
                    actions = tuple(
                        self.action_space.sample() for _ in range(batch_size)
                    )
                else:
                    actions = self.action_space.sample()
                # actions = self.Actions(torch.as_tensor(actions))
                rewards = env.send(actions)
                self._check_rewards(env, rewards)

            env.close()
    
    def _check_observations(self, env: Environment, observations: Any):
        """ Check that the given observation makes sense for the given environment.
        
        TODO: This should probably not be in this file here. It's more used for
        testing than anything else.
        """
        assert isinstance(observations, self.Observations), observations
        images = observations.x
        assert isinstance(images, (torch.Tensor, np.ndarray))
        if isinstance(images, Tensor):
            images = images.cpu().numpy()
        
        # Find the 'image' space:
        if isinstance(env.observation_space, spaces.Box):
            image_space = env.observation_space
        elif isinstance(env.observation_space, spaces.Tuple):
            image_space = env.observation_space[0]
        else:
            raise RuntimeError(f"Don't know how to find the image space in the "
                               f"env's obs space ({env.observation_space}).")
        assert images in image_space
    
    def _check_actions(self, env: Environment, actions: Any):
        if isinstance(actions, Actions):
            assert isinstance(actions, self.Actions)
            actions = actions.y_pred.cpu().numpy()
        elif isinstance(actions, Tensor):
            actions = actions.cpu().numpy()
        elif isinstance(actions, np.ndarray):
            actions = actions
        assert actions in env.action_space
    
    def _check_rewards(self, env: Environment, rewards: Any):
        if isinstance(rewards, Rewards):
            assert isinstance(rewards, self.Rewards)
            rewards = rewards.y
        if isinstance(rewards, Tensor):
            rewards = rewards.cpu().numpy()
        if isinstance(rewards, np.ndarray):
            rewards = rewards
        if isinstance(rewards, (int, float)):
            rewards = np.asarray(rewards)
        assert rewards in env.reward_space, (rewards, env.reward_space)

    # Just to make type hinters stop throwing errors when using the constructor
    # to create a Setting.
    def __new__(cls, *args, **kwargs):
        return super().__new__(cls, *args, **kwargs)

    
    @classmethod
    def load_benchmark(cls: Type[SettingType], benchmark: Union[str, Path]) -> SettingType:
        """ Load the given "benchmark" (pre-configured Setting) of this type. 

        Parameters
        ----------
        cls : Type[SettingType]
            Type of Setting to create.
        benchmark : Union[str, Path]
            Either the name of a benchmark (e.g. "cartpole_state", "monsterkong", etc.)
            or a path to a json/yaml file.

        Returns
        -------
        SettingType
            Setting of type `cls`, appropriately populated according to the chosen
            benchmark.

        Raises
        ------
        RuntimeError
            If `benchmark` isn't an existing file or a known preset.
        RuntimeError
            If any command-line arguments are present in sys.argv which would be ignored
            when creating this setting. 
        """
        # If the provided benchmark isn't a path, try to get the value from
        # the `setting_presets` dict. If it isn't in the dict, raise an
        # error.
        if not Path(benchmark).is_file():
            if benchmark in setting_presets:
                benchmark = setting_presets[benchmark]
            else:
                raise RuntimeError(
                    f"Could not find benchmark '{benchmark}': it "
                    f"is neither a path to a file or a key of the "
                    f"`setting_presets` dictionary. \n"
                    f"(Available presets: {setting_presets}) "
                )
        # Creating an experiment for the given setting, loaded from the
        # config file.
        # TODO: IDEA: Do the same thing for loading the Method?
        logger.info(f"Will load the options for setting {cls} from the file "
                    f"at path {benchmark}.")

        # Raise an error if any of the args in sys.argv would have been used
        # up by the Setting, just to prevent any ambiguities.
        _, unused_args = cls.from_known_args()
        consumed_args = list(set(sys.argv[1:]) - set(unused_args))
        if consumed_args:
            # TODO: This could also be trigerred if there were arguments
            # in the method with the same name as some from the Setting. 
            raise RuntimeError(
                f"Cannot pass command-line arguments for the Setting when "
                f"loading a benchmark, since these arguments whould have been "
                f"ignored when creating the setting of type {cls} "
                f"anyway: {consumed_args}"
            )

        drop_extras = False
        # Actually load the setting from the file.
        setting = cls.load(path=benchmark, drop_extra_fields=drop_extras)
        return setting
예제 #13
0
class MultiTaskSetting(TaskIncrementalSetting):
    """IID version of the Task-Incremental Setting, where the data is shuffled.
    
    Can be used to estimate the upper bound performance of Task-Incremental CL Methods.
    """

    # Number of tasks.
    nb_tasks: int = 0

    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes.
    increment: Union[int, List[int]] = list_field(2,
                                                  type=int,
                                                  nargs="*",
                                                  alias="n_classes_per_task")
    # A different task size applied only for the first task.
    # Desactivated if `increment` is a list.
    initial_increment: int = 0
    # An optional custom class order, used for NC.
    class_order: Optional[List[int]] = constant(None)
    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes (defaults to the value of
    # `increment`).
    test_increment: Optional[Union[List[int], int]] = constant(None)
    # A different task size applied only for the first test task.
    # Desactivated if `test_increment` is a list. Defaults to the
    # value of `initial_increment`.
    test_initial_increment: Optional[int] = constant(None)
    # An optional custom class order for testing, used for NC.
    # Defaults to the value of `class_order`.
    test_class_order: Optional[List[int]] = constant(None)

    def __post_init__(self):
        super().__post_init__()
        # IDEA: We could reuse the training loop from Incremental, if we modify it so it
        # discriminates between "phases" and "tasks".

    @property
    def phases(self) -> int:
        return 1

    def setup(self, stage=None, *args, **kwargs):
        super().setup(stage=stage, *args, **kwargs)

    def get_train_dataset(self) -> Dataset:
        """ Returns the training dataset, which in this case will be shuffled.

        IDEA: We could probably do it the same way in both RL and SL:
        1. Create the 'datasets' for all the tasks;
        2. "concatenate"+"Shuffle" the "datasets":
            - in SL: ConcatDataset / shuffle the datasets
            - in RL: Create a true `MultiTaskEnvironment` that accepts a list of envs as
              an input and alternates between environments at each episode.
              (either round-robin style, or randomly)

        Returns
        -------
        Dataset
        """
        joined_dataset = ConcatDataset(self.train_datasets)
        return shuffle(joined_dataset, rng=np.random)

    def get_val_dataset(self) -> Dataset:
        return ConcatDataset(self.val_datasets)

    def get_test_dataset(self) -> Dataset:
        return ConcatDataset(self.test_datasets)

    def train_dataloader(self,
                         batch_size: int = None,
                         num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the training dataset.

        This dataloader will yield batches which will very likely contain data from
        multiple different tasks, and will contain task labels.

        Parameters
        ----------
        batch_size : int, optional
            Batch size to use. Defaults to None, in which case the value of
            `self.batch_size` is used.
        num_workers : int, optional
            Number of workers to use. Defaults to None, in which case the value of
            `self.num_workers` is used.

        Returns
        -------
        PassiveEnvironment
            A "Passive" Dataloader/gym.Env. 
        """
        return super().train_dataloader(batch_size=batch_size,
                                        num_workers=num_workers)

    def val_dataloader(self,
                       batch_size: int = None,
                       num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the validation dataset.

        This dataloader will yield batches which will very likely contain data from
        multiple different tasks, and will contain task labels.

        Parameters
        ----------
        batch_size : int, optional
            Batch size to use. Defaults to None, in which case the value of
            `self.batch_size` is used.
        num_workers : int, optional
            Number of workers to use. Defaults to None, in which case the value of
            `self.num_workers` is used.

        Returns
        -------
        PassiveEnvironment
            A "Passive" Dataloader/gym.Env. 
        """
        return super().val_dataloader(batch_size=batch_size,
                                      num_workers=num_workers)

    def test_dataloader(self,
                        batch_size: int = None,
                        num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the test dataset.

        This dataloader will yield batches which will very likely contain data from
        multiple different tasks, and will contain task labels.

        Unlike the train and validation environments, the test environment will not
        yield rewards until the action has been sent to it using either `send` (when
        iterating in the DataLoader-style) or `step` (when interacting with the
        environment in the gym.Env style). For more info, take a look at the
        `PassiveEnvironment` class.
        
        Parameters
        ----------
        batch_size : int, optional
            Batch size to use. Defaults to None, in which case the value of
            `self.batch_size` is used.
        num_workers : int, optional
            Number of workers to use. Defaults to None, in which case the value of
            `self.num_workers` is used.

        Returns
        -------
        PassiveEnvironment
            A "Passive" Dataloader/gym.Env. 
        """
        return super().test_dataloader(batch_size=batch_size,
                                       num_workers=num_workers)

    def test_loop(self, method: Method) -> "IncrementalSetting.Results":
        """ Runs a multi-task test loop and returns the Results.
        """
        test_env = self.test_dataloader()
        try:
            # If the Method has `test` defined, use it.
            method.test(test_env)
            test_env.close()
            # Get the metrics from the test environment
            test_results: Results = test_env.get_results()
            print(f"Test results: {test_results}")
            return test_results

        except NotImplementedError:
            logger.info(f"Will query the method for actions at each step, "
                        f"since it doesn't implement a `test` method.")

        obs = test_env.reset()

        # TODO: Do we always have a maximum number of steps? or of episodes?
        # Will it work the same for Supervised and Reinforcement learning?
        max_steps: int = getattr(test_env, "step_limit", None)

        # Reset on the last step is causing trouble, since the env is closed.
        pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
        episode = 0
        for step in pbar:
            if test_env.is_closed():
                logger.debug(f"Env is closed")
                break
            # logger.debug(f"At step {step}")
            action = method.get_actions(obs, test_env.action_space)

            # logger.debug(f"action: {action}")
            # TODO: Remove this:
            if isinstance(action, Actions):
                action = action.y_pred
            if isinstance(action, Tensor):
                action = action.cpu().numpy()

            obs, reward, done, info = test_env.step(action)

            if done and not test_env.is_closed():
                # logger.debug(f"end of test episode {episode}")
                obs = test_env.reset()
                episode += 1

        test_env.close()
        test_results = test_env.get_results()

        return test_results