def nc_benchmark(
        train_dataset: Union[
            Sequence[SupportedDataset], SupportedDataset],
        test_dataset: Union[
            Sequence[SupportedDataset], SupportedDataset],
        n_experiences: int,
        task_labels: bool,
        *,
        shuffle: bool = True,
        seed: Optional[int] = None,
        fixed_class_order: Sequence[int] = None,
        per_exp_classes: Dict[int, int] = None,
        class_ids_from_zero_from_first_exp: bool = False,
        class_ids_from_zero_in_each_exp: bool = False,
        one_dataset_per_exp: bool = False,
        train_transform=None,
        eval_transform=None,
        reproducibility_data: Dict[str, Any] = None) -> NCScenario:
    """
    This is the high-level benchmark instances generator for the
    "New Classes" (NC) case. Given a sequence of train and test datasets creates
    the continual stream of data as a series of experiences. Each experience
    will contain all the instances belonging to a certain set of classes and a
    class won't be assigned to more than one experience.

    This is the reference helper function for creating instances of Class- or
    Task-Incremental benchmarks.

    The ``task_labels`` parameter determines if each incremental experience has
    an increasing task label or if, at the contrary, a default task label "0"
    has to be assigned to all experiences. This can be useful when
    differentiating between Single-Incremental-Task and Multi-Task scenarios.

    There are other important parameters that can be specified in order to tweak
    the behaviour of the resulting scenario. Please take a few minutes to read
    and understand them as they may save you a lot of work.

    This generator features a integrated reproducibility mechanism that allows
    the user to store and later re-load a scenario. For more info see the
    ``reproducibility_data`` parameter.

    :param train_dataset: A list of training datasets, or a single dataset.
    :param test_dataset: A list of test datasets, or a single test dataset.
    :param n_experiences: The number of incremental experience. This is not used
        when using multiple train/test datasets with the ``one_dataset_per_exp``
        parameter set to True.
    :param task_labels: If True, each experience will have an ascending task
            label. If False, the task label will be 0 for all the experiences.
    :param shuffle: If True, the class (or experience) order will be shuffled.
        Defaults to True.
    :param seed: If ``shuffle`` is True and seed is not None, the class (or
        experience) order will be shuffled according to the seed. When None, the
        current PyTorch random number generator state will be used. Defaults to
        None.
    :param fixed_class_order: If not None, the class order to use (overrides
        the shuffle argument). Very useful for enhancing reproducibility.
        Defaults to None.
    :param per_exp_classes: Is not None, a dictionary whose keys are
        (0-indexed) experience IDs and their values are the number of classes
        to include in the respective experiences. The dictionary doesn't
        have to contain a key for each experience! All the remaining experiences
        will contain an equal amount of the remaining classes. The
        remaining number of classes must be divisible without remainder
        by the remaining number of experiences. For instance,
        if you want to include 50 classes in the first experience
        while equally distributing remaining classes across remaining
        experiences, just pass the "{0: 50}" dictionary as the
        per_experience_classes parameter. Defaults to None.
    :param class_ids_from_zero_from_first_exp: If True, original class IDs
        will be remapped so that they will appear as having an ascending
        order. For instance, if the resulting class order after shuffling
        (or defined by fixed_class_order) is [23, 34, 11, 7, 6, ...] and
        class_ids_from_zero_from_first_exp is True, then all the patterns
        belonging to class 23 will appear as belonging to class "0",
        class "34" will be mapped to "1", class "11" to "2" and so on.
        This is very useful when drawing confusion matrices and when dealing
        with algorithms with dynamic head expansion. Defaults to False.
        Mutually exclusive with the ``class_ids_from_zero_in_each_exp``
        parameter.
    :param class_ids_from_zero_in_each_exp: If True, original class IDs
        will be mapped to range [0, n_classes_in_exp) for each experience.
        Defaults to False. Mutually exclusive with the
        ``class_ids_from_zero_from_first_exp`` parameter.
    :param one_dataset_per_exp: available only when multiple train-test
        datasets are provided. If True, each dataset will be treated as a
        experience. Mutually exclusive with the ``per_experience_classes`` and
        ``fixed_class_order`` parameters. Overrides the ``n_experiences`` 
        parameter. Defaults to False.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param reproducibility_data: If not None, overrides all the other
        scenario definition options. This is usually a dictionary containing
        data used to reproduce a specific experiment. One can use the
        ``get_reproducibility_data`` method to get (and even distribute)
        the experiment setup so that it can be loaded by passing it as this
        parameter. In this way one can be sure that the same specific
        experimental setup is being used (for reproducibility purposes).
        Beware that, in order to reproduce an experiment, the same train and
        test datasets must be used. Defaults to None.

    :return: A properly initialized :class:`NCScenario` instance.
    """

    if class_ids_from_zero_from_first_exp and class_ids_from_zero_in_each_exp:
        raise ValueError('Invalid mutually exclusive options '
                         'class_ids_from_zero_from_first_exp and '
                         'classes_ids_from_zero_in_each_exp set at the '
                         'same time')

    if isinstance(train_dataset, list) or isinstance(train_dataset, tuple):
        # Multi-dataset setting

        if len(train_dataset) != len(test_dataset):
            raise ValueError('Train/test dataset lists must contain the '
                             'exact same number of datasets')

        if per_exp_classes and one_dataset_per_exp:
            raise ValueError(
                'Both per_experience_classes and one_dataset_per_exp are'
                'used, but those options are mutually exclusive')

        if fixed_class_order and one_dataset_per_exp:
            raise ValueError(
                'Both fixed_class_order and one_dataset_per_exp are'
                'used, but those options are mutually exclusive')

        seq_train_dataset, seq_test_dataset, mapping = \
            concat_datasets_sequentially(train_dataset, test_dataset)

        if one_dataset_per_exp:
            # If one_dataset_per_exp is True, each dataset will be treated as
            # a experience. In this scenario, shuffle refers to the experience
            # order, not to the class one.
            fixed_class_order, per_exp_classes = \
                _one_dataset_per_exp_class_order(mapping, shuffle, seed)

            # We pass a fixed_class_order to the NCGenericScenario
            # constructor, so we don't need shuffling.
            shuffle = False
            seed = None

            # Overrides n_experiences (and per_experience_classes, already done)
            n_experiences = len(train_dataset)
        train_dataset, test_dataset = seq_train_dataset, seq_test_dataset

    transform_groups = dict(
        train=(train_transform, None),
        eval=(eval_transform, None)
    )

    # Datasets should be instances of AvalancheDataset
    train_dataset = AvalancheDataset(
        train_dataset,
        transform_groups=transform_groups,
        initial_transform_group='train',
        dataset_type=AvalancheDatasetType.CLASSIFICATION)

    test_dataset = AvalancheDataset(
        test_dataset,
        transform_groups=transform_groups,
        initial_transform_group='eval',
        dataset_type=AvalancheDatasetType.CLASSIFICATION)

    return NCScenario(train_dataset, test_dataset, n_experiences, task_labels,
                      shuffle, seed, fixed_class_order, per_exp_classes,
                      class_ids_from_zero_from_first_exp,
                      class_ids_from_zero_in_each_exp,
                      reproducibility_data)
Beispiel #2
0
def EndlessCLSim(
    *,
    scenario: str = _scenario_names[0],
    patch_size: int = 64,
    sequence_order: Optional[List[int]] = None,
    task_order: Optional[List[int]] = None,
    train_transform: Optional[Any] = _default_transform,
    eval_transform: Optional[Any] = _default_transform,
    dataset_root: Union[str, Path] = None,
    semseg=False
):
    """
    Creates a CL scenario for the Endless-Continual-Learning Simulator's
    derived datasets, which are available at:
    https://zenodo.org/record/4899267, or custom datasets created from
    the Endless-Continual-Learning-Simulator's standalone application,
    available at: https://zenodo.org/record/4899294.
    Both are part of the publication of `A Procedural World Generation
    Framework for Systematic Evaluation of Continual Learning
    (https://arxiv.org/abs/2106.02585).

    If the dataset is not present in the computer, this method will
    automatically download and store it.

    All generated scenarios make use of 'task labels'. We regard a full dataset
    as one learning 'sequence', aligned to the terminology in the above paper,
    with 'subsequences' being the iterative learning tasks. Each subsequence
    is realized as one `AvalancheDataset` with ordering inforaced by task
    labels.

    :param scenario: Available, predefined, learning scenarios are:
        'Classes': An learning scenario based on incremental availability of
        object class examples,
        'Illumination': A learning scenario based on iteratively decreasing
        scene illumination.
        'Weather': A learning scenario based on iteratively shifting weather
        conditions.
    :param patch_size: The dimension of the image-patches. Int in the case of
            image-patch classification, because the image-patches need to be
            quadratic. Tuple of integers for image segmentation tasks.
    :param sequence_order: List of intergers indexing the subsequences,
            enables reordering of the subsequences, especially subsequences can
            be omitted. Defaults to None, loading subsequences in their
            original order.
    :param task_order: List of intergers, assigning task labels to each
            respective subsequence.
    :param train_transform: The transformation to apply to the training data.
            Defaults to `_default_transform`, i.e. conversion ToTensor of
            torchvision.
    :param eval_transform: The transformation to apply to the eval data.
            Defaults to `_default_transform`, i.e. conversion ToTensor of
            torchvision.
    :param dataset_root: Absolute path indicating where to store the dataset.
            Defaults to None, which means the default location for
            'endless-cl-sim' will be used.
    :param semseg: boolean to indicate the use of targets for a semantic
            segmentation task. Defaults to False.

    :returns: A properly initialized :class:`EndlessCLSim` instance.
    """
    # Check scenario name is valid
    assert scenario in _scenario_names, (
        "The selected scenario is not "
        "recognized: it should be "
        "'Classes', 'Illumination', "
        "or 'Weather'."
    )

    # Assign default dataset root if None provided
    if dataset_root is None:
        dataset_root = default_dataset_location("endless-cl-sim")

    # Download and prepare the dataset
    endless_cl_sim_dataset = EndlessCLSimDataset(
        root=dataset_root,
        scenario=scenario,
        transform=None,
        download=True,
        semseg=semseg,
    )

    # Default sequence_order if None
    if sequence_order is None:
        sequence_order = list(range(len(endless_cl_sim_dataset)))

    # Default sequence_order if None
    if task_order is None:
        task_order = list(range(len(endless_cl_sim_dataset)))

    train_datasets = []
    eval_datasets = []
    for i in range(len(sequence_order)):
        train_data, eval_data = endless_cl_sim_dataset[sequence_order[i]]

        train_data.transform = train_transform
        eval_data.transform = eval_transform

        train_datasets.append(
            AvalancheDataset(dataset=train_data, task_labels=task_order[i])
        )
        eval_datasets.append(
            AvalancheDataset(dataset=eval_data, task_labels=task_order[i])
        )

    scenario_obj = dataset_benchmark(train_datasets, eval_datasets)

    return scenario_obj
def ni_benchmark(
        train_dataset: Union[
            Sequence[SupportedDataset], SupportedDataset],
        test_dataset: Union[
            Sequence[SupportedDataset], SupportedDataset],
        n_experiences: int,
        *,
        task_labels: bool = False,
        shuffle: bool = True,
        seed: Optional[int] = None,
        balance_experiences: bool = False,
        min_class_patterns_in_exp: int = 0,
        fixed_exp_assignment: Optional[Sequence[Sequence[int]]] = None,
        train_transform=None,
        eval_transform=None,
        reproducibility_data: Optional[Dict[str, Any]] = None) \
        -> NIScenario:
    """
    This is the high-level benchmark instances generator for the
    "New Instances" (NI) case. Given a sequence of train and test datasets
    creates the continual stream of data as a series of experiences.

    This is the reference helper function for creating instances of
    Domain-Incremental benchmarks.

    The ``task_labels`` parameter determines if each incremental experience has
    an increasing task label or if, at the contrary, a default task label "0"
    has to be assigned to all experiences. This can be useful when
    differentiating between Single-Incremental-Task and Multi-Task scenarios.

    There are other important parameters that can be specified in order to tweak
    the behaviour of the resulting scenario. Please take a few minutes to read
    and understand them as they may save you a lot of work.

    This generator features an integrated reproducibility mechanism that allows
    the user to store and later re-load a scenario. For more info see the
    ``reproducibility_data`` parameter.

    :param train_dataset: A list of training datasets, or a single dataset.
    :param test_dataset: A list of test datasets, or a single test dataset.
    :param n_experiences: The number of experiences.
    :param task_labels: If True, each experience will have an ascending task
            label. If False, the task label will be 0 for all the experiences.
    :param shuffle: If True, patterns order will be shuffled.
    :param seed: A valid int used to initialize the random number generator.
        Can be None.
    :param balance_experiences: If True, pattern of each class will be equally
        spread across all experiences. If False, patterns will be assigned to
        experiences in a complete random way. Defaults to False.
    :param min_class_patterns_in_exp: The minimum amount of patterns of
        every class that must be assigned to every experience. Compatible with
        the ``balance_experiences`` parameter. An exception will be raised if
        this constraint can't be satisfied. Defaults to 0.
    :param fixed_exp_assignment: If not None, the pattern assignment
        to use. It must be a list with an entry for each experience. Each entry
        is a list that contains the indexes of patterns belonging to that
        experience. Overrides the ``shuffle``, ``balance_experiences`` and
        ``min_class_patterns_in_exp`` parameters.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param reproducibility_data: If not None, overrides all the other
        scenario definition options, including ``fixed_exp_assignment``.
        This is usually a dictionary containing data used to
        reproduce a specific experiment. One can use the
        ``get_reproducibility_data`` method to get (and even distribute)
        the experiment setup so that it can be loaded by passing it as this
        parameter. In this way one can be sure that the same specific
        experimental setup is being used (for reproducibility purposes).
        Beware that, in order to reproduce an experiment, the same train and
        test datasets must be used. Defaults to None.

    :return: A properly initialized :class:`NIScenario` instance.
    """

    seq_train_dataset, seq_test_dataset = train_dataset, test_dataset
    if isinstance(train_dataset, list) or isinstance(train_dataset, tuple):
        if len(train_dataset) != len(test_dataset):
            raise ValueError('Train/test dataset lists must contain the '
                             'exact same number of datasets')

        seq_train_dataset, seq_test_dataset, _ = \
            concat_datasets_sequentially(train_dataset, test_dataset)

    transform_groups = dict(
        train=(train_transform, None),
        eval=(eval_transform, None)
    )

    # Datasets should be instances of AvalancheDataset
    seq_train_dataset = AvalancheDataset(
        seq_train_dataset,
        transform_groups=transform_groups,
        initial_transform_group='train',
        dataset_type=AvalancheDatasetType.CLASSIFICATION)

    seq_test_dataset = AvalancheDataset(
        seq_test_dataset,
        transform_groups=transform_groups,
        initial_transform_group='eval',
        dataset_type=AvalancheDatasetType.CLASSIFICATION)

    return NIScenario(
        seq_train_dataset, seq_test_dataset,
        n_experiences,
        task_labels,
        shuffle=shuffle, seed=seed,
        balance_experiences=balance_experiences,
        min_class_patterns_in_exp=min_class_patterns_in_exp,
        fixed_exp_assignment=fixed_exp_assignment,
        reproducibility_data=reproducibility_data)
Beispiel #4
0
    def __init__(
        self,
        env: PassiveEnvironment,
        setting: IncrementalSLSetting,
        x: Tensor = None,
        y: Tensor = None,
        task_labels: Tensor = None,
    ):
        super().__init__(env=env)
        self.setting = setting
        self.type: str
        if isinstance(setting, IncrementalSLSetting):
            self.task_id = setting.current_task_id
        else:
            # No known task, or we don't have access to the task ID, so just consider
            # this to come from the first task.
            self.task_id = 0

        if env is setting.train_env:
            self.type = "Train"
            self.transforms = setting.train_transforms
        elif env is setting.val_env:
            self.type = "Valid"
            self.transforms = setting.val_transforms
        else:
            self.type = "Test"
            assert env is setting.test_env
            self.transforms = setting.test_transforms
        self.name = f"{self.type}_{self.task_id}"

        if x is None or y is None or task_labels is None:
            all_observations: List[Observations] = []
            all_rewards: List[Rewards] = []

            for batch in tqdm.tqdm(
                    self, desc="Converting environment into TensorDataset"):
                observations: Observations
                rewards: Optional[Rewards]
                if isinstance(batch, Observations):
                    observations = batch
                    rewards = None
                else:
                    assert isinstance(batch, tuple) and len(batch) == 2
                    observations, rewards = batch

                if rewards is None:
                    # Need to send actions to the env before we can actually get the
                    # associated Reward.
                    # Here we sample a random action (no other choice really..) and so we
                    # are going to get bad results in case the online performance is being
                    # evaluated.
                    action = self.env.action_space.sample()
                    if observations.batch_size != action.shape[0]:
                        action = action[:observations.batch_size]

                    rewards = self.env.send(action)

                all_observations.append(observations)
                all_rewards.append(rewards)
            # TODO: This will be absolutely unfeasable for larger dataset like ImageNet.
            stacked_observations: Observations = Observations.concatenate(
                all_observations)
            x = stacked_observations.x
            task_labels = stacked_observations.task_labels
            assert all(
                y_i is not None for y in all_rewards
                for y_i in y), "Need fully labeled train dataset for now."
            stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
            y = stacked_rewards.y

        if all(t is None for t in task_labels):
            # The task labels are None, even at training time, which indicates this
            # is probably a `ContinualSLSetting`
            task_labels = None
        elif isinstance(task_labels, Tensor):
            task_labels = task_labels.cpu().numpy().tolist()

        dataset = TensorDataset(x, y)
        self._tensor_dataset = dataset
        self._dataset = AvalancheDataset(
            dataset=dataset,
            task_labels=task_labels,
            targets=y.tolist(),
            dataset_type=AvalancheDatasetType.CLASSIFICATION,
        )