def test_classes_in_this_experience(self):
        train_exps = []

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 70, (200, ))
        tensor_t = torch.randint(0, 5, (200, ))
        train_exps.append(
            AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t))

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200, ))
        tensor_t = torch.randint(0, 5, (200, ))
        train_exps.append(
            AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t))

        test_exps = []
        test_x = torch.rand(200, 3, 28, 28)
        test_y = torch.randint(100, 200, (200, ))
        test_t = torch.randint(0, 5, (200, ))
        test_exps.append(
            AvalancheTensorDataset(test_x, test_y, task_labels=test_t))

        other_stream_exps = []
        other_x = torch.rand(200, 3, 28, 28)
        other_y = torch.randint(400, 600, (200, ))
        other_t = torch.randint(0, 5, (200, ))
        other_stream_exps.append(
            AvalancheTensorDataset(other_x, other_y, task_labels=other_t))

        benchmark_instance = dataset_benchmark(
            train_datasets=train_exps,
            test_datasets=test_exps,
            other_streams_datasets={'other': other_stream_exps})

        train_exp_0: GenericExperience = benchmark_instance.train_stream[0]
        train_exp_1: GenericExperience = benchmark_instance.train_stream[1]
        train_0_classes = train_exp_0.classes_in_this_experience
        train_1_classes = train_exp_1.classes_in_this_experience
        train_0_classes_min = min(train_0_classes)
        train_1_classes_min = min(train_1_classes)
        train_0_classes_max = max(train_0_classes)
        train_1_classes_max = max(train_1_classes)
        self.assertGreaterEqual(train_0_classes_min, 0)
        self.assertLess(train_0_classes_max, 70)
        self.assertGreaterEqual(train_1_classes_min, 0)
        self.assertLess(train_1_classes_max, 100)

        test_exp_0: GenericExperience = benchmark_instance.test_stream[0]
        test_0_classes = test_exp_0.classes_in_this_experience
        test_0_classes_min = min(test_0_classes)
        test_0_classes_max = max(test_0_classes)
        self.assertGreaterEqual(test_0_classes_min, 100)
        self.assertLess(test_0_classes_max, 200)

        other_exp_0: GenericExperience = benchmark_instance.other_stream[0]
        other_0_classes = other_exp_0.classes_in_this_experience
        other_0_classes_min = min(other_0_classes)
        other_0_classes_max = max(other_0_classes)
        self.assertGreaterEqual(other_0_classes_min, 400)
        self.assertLess(other_0_classes_max, 600)
    def _make_tensor_datasets(self):
        train_exps = []

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 70, (200,))
        tensor_t = torch.randint(0, 5, (200,))
        train_exps.append(AvalancheTensorDataset(
            tensor_x, tensor_y, task_labels=tensor_t,
            dataset_type=AvalancheDatasetType.CLASSIFICATION))

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200,))
        tensor_t = torch.randint(0, 5, (200,))
        train_exps.append(AvalancheTensorDataset(
            tensor_x, tensor_y, task_labels=tensor_t,
            dataset_type=AvalancheDatasetType.CLASSIFICATION))

        test_exps = []
        test_x = torch.rand(200, 3, 28, 28)
        test_y = torch.randint(100, 200, (200,))
        test_t = torch.randint(0, 5, (200,))
        test_exps.append(AvalancheTensorDataset(
            test_x, test_y, task_labels=test_t,
            dataset_type=AvalancheDatasetType.CLASSIFICATION))

        other_stream_exps = []
        other_x = torch.rand(200, 3, 28, 28)
        other_y = torch.randint(400, 600, (200,))
        other_t = torch.randint(0, 5, (200,))
        other_stream_exps.append(AvalancheTensorDataset(
            other_x, other_y, task_labels=other_t,
            dataset_type=AvalancheDatasetType.CLASSIFICATION))

        return train_exps, test_exps, other_stream_exps
Example #3
0
    def test(self):
        # Given
        model = AbsModel()
        herding = HerdingSelectionStrategy(model, "features")
        closest_to_center = ClosestToCenterSelectionStrategy(model, "features")

        # When
        # Features are [[0], [4], [5]]
        # Center is [3]
        dataset = AvalancheTensorDataset(
            tensor([0, -4, 5]).float(), zeros(3),
            dataset_type=AvalancheDatasetType.CLASSIFICATION
        )
        strategy = MagicMock(device="cpu", eval_mb_size=8)

        # Then

        # Herding:

        # 1. At first pass, we select the -4 (at index 1)
        #  because it is the closest ([4]) to the center in feature space
        # 2. At second pass, we select 0 (of index 0)
        #  because the center will be [2], closest to [3] than the center
        #  obtained if we were to select 5 ([4.5])
        # 3. Finally we select the last remaining exemplar
        self.assertSequenceEqual([1, 0, 2],
                                 herding.make_sorted_indices(strategy, dataset))
        # Closest to center

        # -4 (index 1) is the closest to the center in feature space.
        # Then 5 (index 2) is closest than 0 (index 0)
        self.assertSequenceEqual([1, 2, 0],
                                 closest_to_center.make_sorted_indices(strategy,
                                                                       dataset))
Example #4
0
    def after_train_dataset_adaptation(self, strategy: 'BaseStrategy',
                                       **kwargs):
        if strategy.training_exp_counter != 0:
            memory = AvalancheTensorDataset(
                torch.cat(self.x_memory).cpu(),
                list(itertools.chain.from_iterable(self.y_memory)),
                transform=self.buffer_transform, target_transform=None)

            strategy.adapted_dataset = \
                AvalancheConcatDataset((strategy.adapted_dataset, memory))
Example #5
0
    def test_complete_test_set_only(self):
        train_exps = []
        test_exps = []

        for _ in range(5):
            tensor_x = torch.rand(200, 3, 28, 28)
            tensor_y = torch.randint(0, 100, (200, ))
            tensor_t = torch.randint(0, 5, (200, ))
            train_exps.append(
                AvalancheTensorDataset(tensor_x,
                                       tensor_y,
                                       task_labels=tensor_t))

        for _ in range(3):
            tensor_x = torch.rand(150, 3, 28, 28)
            tensor_y = torch.randint(0, 100, (150, ))
            tensor_t = torch.randint(0, 5, (150, ))
            test_exps.append(
                AvalancheTensorDataset(tensor_x,
                                       tensor_y,
                                       task_labels=tensor_t))

        with self.assertRaises(Exception):
            benchmark_instance = GenericCLScenario(
                stream_definitions={
                    "train": (train_exps, ),
                    "test": (test_exps, ),
                },
                complete_test_set_only=True,
            )

        benchmark_instance = GenericCLScenario(
            stream_definitions={
                "train": (train_exps, ),
                "test": (test_exps[0], ),
            },
            complete_test_set_only=True,
        )

        self.assertEqual(5, len(benchmark_instance.train_stream))
        self.assertEqual(1, len(benchmark_instance.test_stream))
Example #6
0
    def observe_exemplars(self, class2exemplars: Dict[int, List[int]],
                          selection_order: List[int]):
        self.policy.selection_strategy = FixedSelectionStrategy(selection_order)
        x = tensor(
            [i for exemplars in class2exemplars.values() for i in exemplars])
        y = tensor(
            [class_id for class_id, exemplars in class2exemplars.items() for _
             in exemplars]).long()
        dataset = AvalancheTensorDataset(
            x, y, dataset_type=AvalancheDatasetType.CLASSIFICATION)

        self.policy(MagicMock(experience=MagicMock(dataset=dataset)))
Example #7
0
    def after_train_dataset_adaptation(self, strategy: "SupervisedTemplate",
                                       **kwargs):
        if strategy.clock.train_exp_counter != 0:
            memory = AvalancheTensorDataset(
                torch.cat(self.x_memory).cpu(),
                list(itertools.chain.from_iterable(self.y_memory)),
                transform=self.buffer_transform,
                target_transform=None,
            )

            strategy.adapted_dataset = AvalancheConcatDataset(
                (strategy.adapted_dataset, memory))
Example #8
0
    def test_tensor_samples(args):
        p_metric = ImagesSamplePlugin(
            n_cols=5, n_rows=5, group=True, mode="train"
        )

        scenario = SplitMNIST(5)
        curr_exp = scenario.train_stream[0]
        for mb in DataLoader(curr_exp.dataset, batch_size=32):
            break
        curr_dataset = AvalancheTensorDataset(*mb[:2], targets=mb[1])

        strategy_mock = MagicMock(
            eval_mb_size=32, experience=curr_exp, adapted_dataset=curr_dataset
        )

        mval = p_metric.after_train_dataset_adaptation(strategy_mock)
        img_grid = mval[0].value.image
Example #9
0
def create_generic_scenario_from_tensor_lists(
        train_tensors: Sequence[Sequence[Any]],
        test_tensors: Sequence[Sequence[Any]],
        task_labels: Sequence[int],
        *,
        complete_test_set_only: bool = False,
        train_transform=None,
        train_target_transform=None,
        eval_transform=None,
        eval_target_transform=None,
        dataset_type: AvalancheDatasetType = None) -> GenericCLScenario:
    """
    This helper function is DEPRECATED in favor of
    `create_generic_benchmark_from_tensor_lists`.

    Creates a generic scenario given lists of Tensors. A separate dataset will
    be created from each Tensor tuple (x, y, z, ...) and each of those training
    datasets will be considered a separate training experience. Using this
    helper function is the lowest-level way to create a Continual Learning
    scenario. When possible, consider using higher level helpers.

    Experiences are defined by passing lists of tensors as the `train_tensors`
    and `test_tensors` parameter. Those parameters must be lists containing
    sub-lists of tensors, one for each experience. Each tensor defines the value
    of a feature ("x", "y", "z", ...) for all patterns of that experience.

    By default the second tensor of each experience will be used to fill the
    `targets` value (label of each pattern).

    In its base form, the test lists must contain the same amount of elements of
    the training lists. Those pairs of datasets are then used to create the
    "past", "cumulative" (a.k.a. growing) and "future" test sets.
    However, in certain Continual Learning scenarios only the concept of
    "complete" test set makes sense. In that case, the
    ``complete_test_set_only`` should be set to True (see the parameter
    description for more info).

    :param train_tensors: A list of lists. The first list must contain the
        tensors for the first training experience (one tensor per feature), the
        second list must contain the tensors for the second training experience,
        and so on.
    :param test_tensors: A list of lists. The first list must contain the
        tensors for the first test experience (one tensor per feature), the
        second list must contain the tensors for the second test experience,
        and so on. When using `complete_test_set_only`, this parameter
        must be a list containing a single sub-list for the single test
        experience.
    :param task_labels: A list of task labels. Must contain a task label for
        each experience. For Single-Incremental-Task (a.k.a. Task-Free)
        scenarios, this is usually a list of zeros. For Multi Task scenario,
        this is usually a list of ascending task labels (starting from 0).
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the scenario. This means that ``test_tensors`` must
        define a single experience. Defaults to False, which means that
        ``train_tensors`` and ``test_tensors`` must define the same
        amount of experiences.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param dataset_type: The type of the dataset. Defaults to None, which
        means that the type will be obtained from the input datasets. If input
        datasets are not instances of :class:`AvalancheDataset`, the type
        UNDEFINED will be used.

    :returns: A :class:`GenericCLScenario` instance.
    """

    warnings.warn(
        "create_generic_scenario_from_tensor_lists is deprecated in "
        "favor of create_generic_benchmark_from_tensor_lists.",
        DeprecationWarning,
    )

    train_datasets = [
        AvalancheTensorDataset(*exp_tensors, dataset_type=dataset_type)
        for exp_tensors in train_tensors
    ]

    test_datasets = [
        AvalancheTensorDataset(*exp_tensors, dataset_type=dataset_type)
        for exp_tensors in test_tensors
    ]

    return create_multi_dataset_generic_scenario(
        train_datasets,
        test_datasets,
        task_labels,
        train_transform=train_transform,
        train_target_transform=train_target_transform,
        eval_transform=eval_transform,
        eval_target_transform=eval_target_transform,
        complete_test_set_only=complete_test_set_only,
        dataset_type=dataset_type,
    )
Example #10
0
def main(args):
    # --- CONFIG
    device = torch.device(
        f"cuda:{args.cuda}"
        if torch.cuda.is_available() and args.cuda >= 0
        else "cpu"
    )
    # ---------

    tr_ds = [
        AvalancheTensorDataset(
            torch.randn(10, 3),
            torch.randint(0, 3, (10,)).tolist(),
            task_labels=torch.randint(0, 5, (10,)).tolist(),
        )
        for _ in range(3)
    ]
    ts_ds = [
        AvalancheTensorDataset(
            torch.randn(10, 3),
            torch.randint(0, 3, (10,)).tolist(),
            task_labels=torch.randint(0, 5, (10,)).tolist(),
        )
        for _ in range(3)
    ]
    scenario = create_multi_dataset_generic_benchmark(
        train_datasets=tr_ds, test_datasets=ts_ds
    )
    # ---------

    # MODEL CREATION
    model = SimpleMLP(num_classes=3, input_size=3)

    # DEFINE THE EVALUATION PLUGIN AND LOGGER
    # The evaluation plugin manages the metrics computation.
    # It takes as argument a list of metrics and a list of loggers.
    # The evaluation plugin calls the loggers to serialize the metrics
    # and save them in persistent memory or print them in the standard output.

    # log to text file
    text_logger = TextLogger(open("log.txt", "a"))

    # print to stdout
    interactive_logger = InteractiveLogger()

    csv_logger = CSVLogger()

    eval_plugin = EvaluationPlugin(
        accuracy_metrics(
            minibatch=True,
            epoch=True,
            epoch_running=True,
            experience=True,
            stream=True,
        ),
        loss_metrics(
            minibatch=True,
            epoch=True,
            epoch_running=True,
            experience=True,
            stream=True,
        ),
        forgetting_metrics(experience=True, stream=True),
        bwt_metrics(experience=True, stream=True),
        cpu_usage_metrics(
            minibatch=True,
            epoch=True,
            epoch_running=True,
            experience=True,
            stream=True,
        ),
        timing_metrics(
            minibatch=True,
            epoch=True,
            epoch_running=True,
            experience=True,
            stream=True,
        ),
        ram_usage_metrics(
            every=0.5, minibatch=True, epoch=True, experience=True, stream=True
        ),
        gpu_usage_metrics(
            args.cuda,
            every=0.5,
            minibatch=True,
            epoch=True,
            experience=True,
            stream=True,
        ),
        disk_usage_metrics(
            minibatch=True, epoch=True, experience=True, stream=True
        ),
        MAC_metrics(minibatch=True, epoch=True, experience=True),
        loggers=[interactive_logger, text_logger, csv_logger],
        collect_all=True,
    )  # collect all metrics (set to True by default)

    # CREATE THE STRATEGY INSTANCE (NAIVE)
    cl_strategy = Naive(
        model,
        SGD(model.parameters(), lr=0.001, momentum=0.9),
        CrossEntropyLoss(),
        train_mb_size=500,
        train_epochs=1,
        eval_mb_size=100,
        device=device,
        evaluator=eval_plugin,
        eval_every=1,
    )

    # TRAINING LOOP
    print("Starting experiment...")
    results = []
    for i, experience in enumerate(scenario.train_stream):
        print("Start of experience: ", experience.current_experience)
        print("Current Classes: ", experience.classes_in_this_experience)

        # train returns a dictionary containing last recorded value
        # for each metric.
        res = cl_strategy.train(experience, eval_streams=[scenario.test_stream])
        print("Training completed")

        print("Computing accuracy on the whole test set")
        # test returns a dictionary with the last metric collected during
        # evaluation on that stream
        results.append(cl_strategy.eval(scenario.test_stream))

    print(f"Test metrics:\n{results}")

    # Dict with all the metric curves,
    # only available when `collect_all` is True.
    # Each entry is a (x, metric value) tuple.
    # You can use this dictionary to manipulate the
    # metrics without avalanche.
    all_metrics = cl_strategy.evaluator.get_all_metrics()
    print(f"Stored metrics: {list(all_metrics.keys())}")
def create_generic_benchmark_from_tensor_lists(
    train_tensors: Sequence[Sequence[Any]],
    test_tensors: Sequence[Sequence[Any]],
    *,
    other_streams_tensors: Dict[str, Sequence[Sequence[Any]]] = None,
    task_labels: Sequence[int],
    complete_test_set_only: bool = False,
    train_transform=None,
    train_target_transform=None,
    eval_transform=None,
    eval_target_transform=None,
    other_streams_transforms: Dict[str, Tuple[Any, Any]] = None,
    dataset_type: AvalancheDatasetType = None
) -> GenericCLScenario:
    """
    Creates a benchmark instance given lists of Tensors. A separate dataset will
    be created from each Tensor tuple (x, y, z, ...) and each of those training
    datasets will be considered a separate training experience. Using this
    helper function is the lowest-level way to create a Continual Learning
    benchmark. When possible, consider using higher level helpers.

    Experiences are defined by passing lists of tensors as the `train_tensors`,
    `test_tensors` (and `other_streams_tensors`) parameters. Those parameters
    must be lists containing lists of tensors, one list for each experience.
    Each tensor defines the value of a feature ("x", "y", "z", ...) for all
    patterns of that experience.

    By default the second tensor of each experience will be used to fill the
    `targets` value (label of each pattern).

    Beware that task labels can only be defined by choosing a single task label
    for each experience (the same task label is applied to all patterns of
    experiences sharing the same position in different streams).

    When in need to create a benchmark instance in which task labels are defined
    in a more fine-grained way, then consider using
    :func:`create_multi_dataset_generic_benchmark` by passing properly
    initialized :class:`AvalancheDataset` instances.

    :param train_tensors: A list of lists. The first list must contain the
        tensors for the first training experience (one tensor per feature), the
        second list must contain the tensors for the second training experience,
        and so on.
    :param test_tensors: A list of lists. The first list must contain the
        tensors for the first test experience (one tensor per feature), the
        second list must contain the tensors for the second test experience,
        and so on. When using `complete_test_set_only`, this parameter
        must be a list containing a single sub-list for the single test
        experience.
    :param other_streams_tensors: A dictionary describing the content of
        custom streams. Keys must be valid stream names (letters and numbers,
        not starting with a number) while the value follow the same structure
        of `train_tensors` and `test_tensors` parameters. If this
        dictionary contains the definition for "train" or "test" streams then
        those definition will  override the `train_tensors` and `test_tensors`
        parameters.
    :param task_labels: A list of task labels. Must contain at least a value
        for each experience. Each value describes the task label that will be
        applied to all patterns of a certain experience. For more info on that,
        see the function description.
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the benchmark. This means that ``test_tensors`` must
        define a single experience. Defaults to False.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param other_streams_transforms: Transformations to apply to custom
        streams. If no transformations are defined for a custom stream,
        then "train" transformations will be used. This parameter must be a
        dictionary mapping stream names to transformations. The transformations
        must be a two elements tuple where the first element defines the
        X transformation while the second element is the Y transformation.
        Those elements can be None. If this dictionary contains the
        transformations for "train" or "test" streams then those transformations
        will override the `train_transform`, `train_target_transform`,
        `eval_transform` and `eval_target_transform` parameters.
    :param dataset_type: The type of the dataset. Defaults to UNDEFINED.

    :returns: A :class:`GenericCLScenario` instance.
    """

    input_streams = dict(train=train_tensors, test=test_tensors)

    if other_streams_tensors is not None:
        input_streams = {**input_streams, **other_streams_tensors}

    stream_definitions = dict()

    for stream_name, list_of_exps_tensors in input_streams.items():
        stream_datasets = []
        for exp_id, exp_tensors in enumerate(list_of_exps_tensors):
            stream_datasets.append(
                AvalancheTensorDataset(
                    *exp_tensors,
                    dataset_type=dataset_type,
                    task_labels=task_labels[exp_id]
                )
            )

        stream_definitions[stream_name] = stream_datasets

    return create_multi_dataset_generic_benchmark(
        [],
        [],
        other_streams_datasets=stream_definitions,
        train_transform=train_transform,
        train_target_transform=train_target_transform,
        eval_transform=eval_transform,
        eval_target_transform=eval_target_transform,
        complete_test_set_only=complete_test_set_only,
        other_streams_transforms=other_streams_transforms,
        dataset_type=dataset_type,
    )
Example #12
0
    def test_lazy_benchmark_with_validation_stream_fixed_size(self):
        lazy_options = [None, True, False]
        for lazy_option in lazy_options:
            with self.subTest(lazy_option=lazy_option):
                pattern_shape = (3, 32, 32)

                # Definition of training experiences
                # Experience 1
                experience_1_x = torch.zeros(100, *pattern_shape)
                experience_1_y = torch.zeros(100, dtype=torch.long)
                experience_1_dataset = AvalancheTensorDataset(
                    experience_1_x, experience_1_y)

                # Experience 2
                experience_2_x = torch.zeros(80, *pattern_shape)
                experience_2_y = torch.ones(80, dtype=torch.long)
                experience_2_dataset = AvalancheTensorDataset(
                    experience_2_x, experience_2_y)

                # Test experience
                test_x = torch.zeros(50, *pattern_shape)
                test_y = torch.zeros(50, dtype=torch.long)
                experience_test = AvalancheTensorDataset(test_x, test_y)

                def train_gen():
                    # Lazy generator of the training stream
                    for dataset in [
                            experience_1_dataset, experience_2_dataset
                    ]:
                        yield dataset

                def test_gen():
                    # Lazy generator of the test stream
                    for dataset in [experience_test]:
                        yield dataset

                initial_benchmark_instance = create_lazy_generic_benchmark(
                    train_generator=LazyStreamDefinition(
                        train_gen(), 2, [0, 0]),
                    test_generator=LazyStreamDefinition(test_gen(), 1, [0]),
                    complete_test_set_only=True,
                    dataset_type=AvalancheDatasetType.CLASSIFICATION,
                )

                valid_benchmark = benchmark_with_validation_stream(
                    initial_benchmark_instance,
                    20,
                    shuffle=False,
                    lazy_splitting=lazy_option,
                )

                if lazy_option is None or lazy_option:
                    expect_laziness = True
                else:
                    expect_laziness = False

                self.assertEqual(
                    expect_laziness,
                    valid_benchmark.stream_definitions["train"].is_lazy,
                )

                self.assertEqual(2, len(valid_benchmark.train_stream))
                self.assertEqual(2, len(valid_benchmark.valid_stream))
                self.assertEqual(1, len(valid_benchmark.test_stream))
                self.assertTrue(valid_benchmark.complete_test_set_only)

                maybe_exp = valid_benchmark.stream_definitions[
                    "train"].exps_data.get_experience_if_loaded(0)
                self.assertEqual(expect_laziness, maybe_exp is None)

                self.assertEqual(80,
                                 len(valid_benchmark.train_stream[0].dataset))

                maybe_exp = valid_benchmark.stream_definitions[
                    "train"].exps_data.get_experience_if_loaded(1)
                self.assertEqual(expect_laziness, maybe_exp is None)

                self.assertEqual(60,
                                 len(valid_benchmark.train_stream[1].dataset))

                maybe_exp = valid_benchmark.stream_definitions[
                    "valid"].exps_data.get_experience_if_loaded(0)
                self.assertEqual(expect_laziness, maybe_exp is None)

                self.assertEqual(20,
                                 len(valid_benchmark.valid_stream[0].dataset))

                maybe_exp = valid_benchmark.stream_definitions[
                    "valid"].exps_data.get_experience_if_loaded(1)
                self.assertEqual(expect_laziness, maybe_exp is None)

                self.assertEqual(20,
                                 len(valid_benchmark.valid_stream[1].dataset))

                self.assertIsNotNone(
                    valid_benchmark.stream_definitions["train"].exps_data.
                    get_experience_if_loaded(0))
                self.assertIsNotNone(
                    valid_benchmark.stream_definitions["valid"].exps_data.
                    get_experience_if_loaded(0))
                self.assertIsNotNone(
                    valid_benchmark.stream_definitions["train"].exps_data.
                    get_experience_if_loaded(1))
                self.assertIsNotNone(
                    valid_benchmark.stream_definitions["valid"].exps_data.
                    get_experience_if_loaded(1))

                self.assertTrue(
                    torch.equal(
                        experience_1_x[:80],
                        valid_benchmark.train_stream[0].dataset[:][0],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_2_x[:60],
                        valid_benchmark.train_stream[1].dataset[:][0],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_1_y[:80],
                        valid_benchmark.train_stream[0].dataset[:][1],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_2_y[:60],
                        valid_benchmark.train_stream[1].dataset[:][1],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_1_x[80:],
                        valid_benchmark.valid_stream[0].dataset[:][0],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_2_x[60:],
                        valid_benchmark.valid_stream[1].dataset[:][0],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_1_y[80:],
                        valid_benchmark.valid_stream[0].dataset[:][1],
                    ))

                self.assertTrue(
                    torch.equal(
                        experience_2_y[60:],
                        valid_benchmark.valid_stream[1].dataset[:][1],
                    ))

                self.assertTrue(
                    torch.equal(test_x,
                                valid_benchmark.test_stream[0].dataset[:][0]))

                self.assertTrue(
                    torch.equal(test_y,
                                valid_benchmark.test_stream[0].dataset[:][1]))
    def test_avalanche_concat_dataset_collate_fn_inheritance(self):
        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200, ))
        tensor_z = torch.randint(0, 100, (200, ))

        tensor_x2 = torch.rand(200, 3, 28, 28)
        tensor_y2 = torch.randint(0, 100, (200, ))
        tensor_z2 = torch.randint(0, 100, (200, ))

        def my_collate_fn(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 1
            z_values = torch.tensor([-1 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        def my_collate_fn2(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 2
            z_values = torch.tensor([-2 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        dataset1 = TensorDataset(tensor_x, tensor_y, tensor_z)
        dataset2 = AvalancheTensorDataset(tensor_x2,
                                          tensor_y2,
                                          tensor_z2,
                                          collate_fn=my_collate_fn)
        concat = AvalancheConcatDataset([dataset1, dataset2],
                                        collate_fn=my_collate_fn2)  # Ok

        x, y, z, t = dataset2[0:5]
        self.assertIsInstance(x, Tensor)
        self.assertTrue(torch.equal(tensor_x2[0:5], x))
        self.assertTrue(torch.equal(tensor_y2[0:5] + 1, y))
        self.assertTrue(torch.equal(torch.full((5, ), -1, dtype=torch.long),
                                    z))
        self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t))

        x2, y2, z2, t2 = concat[0:5]
        self.assertIsInstance(x2, Tensor)
        self.assertTrue(torch.equal(tensor_x[0:5], x2))
        self.assertTrue(torch.equal(tensor_y[0:5] + 2, y2))
        self.assertTrue(
            torch.equal(torch.full((5, ), -2, dtype=torch.long), z2))
        self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t2))

        dataset1_classification = AvalancheTensorDataset(
            tensor_x,
            tensor_y,
            tensor_z,
            dataset_type=AvalancheDatasetType.CLASSIFICATION)

        dataset2_segmentation = AvalancheDataset(
            dataset2, dataset_type=AvalancheDatasetType.SEGMENTATION)

        with self.assertRaises(ValueError):
            bad_concat_types = dataset1_classification + dataset2_segmentation

        with self.assertRaises(ValueError):
            bad_concat_collate = AvalancheConcatDataset(
                [dataset1, dataset2_segmentation], collate_fn=my_collate_fn)

        ok_concat_classification = dataset1_classification + dataset2
        self.assertEqual(AvalancheDatasetType.CLASSIFICATION,
                         ok_concat_classification.dataset_type)

        ok_concat_classification2 = dataset2 + dataset1_classification
        self.assertEqual(AvalancheDatasetType.CLASSIFICATION,
                         ok_concat_classification2.dataset_type)
Example #14
0
def create_generic_scenario_from_tensors(
        train_data_x: Sequence[Any],
        train_data_y: Sequence[Sequence[SupportsInt]],
        test_data_x: Union[Any, Sequence[Any]],
        test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]],
        task_labels: Sequence[int],
        complete_test_set_only: bool = False,
        train_transform=None,
        train_target_transform=None,
        test_transform=None,
        test_target_transform=None) -> GenericCLScenario:
    """
    Creates a generic scenario given lists of Tensors and the respective task
    labels. A separate dataset will be created from each Tensor pair (x + y)
    and each of those training datasets will be considered a separate
    training experience. Contents of the datasets will not be changed, including
    the targets. Using this helper function is the lower level way to create a
    Continual Learning scenario. When possible, consider using higher level
    helpers.

    In its base form, the test lists must contain the same amount of elements of
    the training lists. Those pairs of datasets are then used to create the
    "past", "cumulative" (a.k.a. growing) and "future" test sets.
    However, in certain Continual Learning scenarios only the concept of
    "complete" test set makes sense. In that case, the
    ``complete_test_set_only`` should be set to True (see the parameter
    description for more info).

    :param train_data_x: A list of Tensors (one per experience) containing the
        patterns of the training sets.
    :param train_data_y: A list of Tensors or int lists containing the
        labels of the patterns of the training sets. Must contain the same
        number of elements of ``train_datasets_x``.
    :param test_data_x: A Tensor or a list of Tensors (one per experience)
        containing the patterns of the test sets.
    :param test_data_y: A Tensor or a list of Tensors or int lists containing
        the labels of the patterns of the test sets. Must contain the same
        number of elements of ``test_datasets_x``.
    :param task_labels: A list of task labels. Must contain the same amount of
        elements of the ``train_datasets_x`` parameter. For
        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
        a list of zeros. For Multi Task scenario, this is usually a list of
        ascending task labels (starting from 0).
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the scenario. This means that the ``test_datasets_x`` and
        ``test_datasets_y`` parameters must be lists with a single element
        (the complete test set). Defaults to False, which means that
        ``train_file_lists`` and ``test_file_lists`` must contain the same
        amount of filelists paths.
    :param train_transform: The transformation to apply to training patterns.
        Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param test_transform: The transformation to apply to test patterns.
        Defaults to None.
    :param test_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.

    :returns: A :class:`GenericCLScenario` instance.
    """

    if len(train_data_x) != len(train_data_y):
        raise ValueError('train_data_x and train_data_y must contain'
                         ' the same amount of elements')

    if type(test_data_x) != type(test_data_y):
        raise ValueError('test_data_x and test_data_y must be of'
                         ' the same type')

    if isinstance(test_data_x, Tensor):
        test_data_x = [test_data_x]
        test_data_y = [test_data_y]
    else:
        if len(test_data_x) != len(test_data_y):
            raise ValueError('test_data_x and test_data_y must contain'
                             ' the same amount of elements')

    transform_groups = dict(train=(train_transform, train_target_transform),
                            test=(test_transform, test_target_transform))

    train_datasets = [
        AvalancheTensorDataset(dataset_x,
                               dataset_y,
                               transform_groups=transform_groups,
                               initial_transform_group='train')
        for dataset_x, dataset_y in zip(train_data_x, train_data_y)
    ]

    test_datasets = [
        AvalancheTensorDataset(dataset_x,
                               dataset_y,
                               transform_groups=transform_groups,
                               initial_transform_group='test')
        for dataset_x, dataset_y in zip(test_data_x, test_data_y)
    ]

    return create_multi_dataset_generic_scenario(
        train_datasets,
        test_datasets,
        task_labels,
        complete_test_set_only=complete_test_set_only)
    def test_lazy_benchmark_with_validation_stream_fixed_size(self):
        pattern_shape = (3, 32, 32)

        # Definition of training experiences
        # Experience 1
        experience_1_x = torch.zeros(100, *pattern_shape)
        experience_1_y = torch.zeros(100, dtype=torch.long)
        experience_1_dataset = AvalancheTensorDataset(experience_1_x,
                                                      experience_1_y)

        # Experience 2
        experience_2_x = torch.zeros(80, *pattern_shape)
        experience_2_y = torch.ones(80, dtype=torch.long)
        experience_2_dataset = AvalancheTensorDataset(experience_2_x,
                                                      experience_2_y)

        # Test experience
        test_x = torch.zeros(50, *pattern_shape)
        test_y = torch.zeros(50, dtype=torch.long)
        experience_test = AvalancheTensorDataset(test_x, test_y)

        def train_gen():
            # Lazy generator of the training stream
            for dataset in [experience_1_dataset, experience_2_dataset]:
                yield dataset

        def test_gen():
            # Lazy generator of the test stream
            for dataset in [experience_test]:
                yield dataset

        initial_benchmark_instance = create_lazy_generic_benchmark(
            train_generator=LazyStreamDefinition(train_gen(), 2, [0, 0]),
            test_generator=LazyStreamDefinition(test_gen(), 1, [0]),
            complete_test_set_only=True,
            dataset_type=AvalancheDatasetType.CLASSIFICATION)

        valid_benchmark = benchmark_with_validation_stream(
            initial_benchmark_instance, 20, shuffle=False)

        self.assertEqual(2, len(valid_benchmark.train_stream))
        self.assertEqual(2, len(valid_benchmark.valid_stream))
        self.assertEqual(1, len(valid_benchmark.test_stream))
        self.assertTrue(valid_benchmark.complete_test_set_only)

        self.assertEqual(80, len(valid_benchmark.train_stream[0].dataset))
        self.assertEqual(60, len(valid_benchmark.train_stream[1].dataset))
        self.assertEqual(20, len(valid_benchmark.valid_stream[0].dataset))
        self.assertEqual(20, len(valid_benchmark.valid_stream[1].dataset))

        self.assertTrue(
            torch.equal(experience_1_x[:80],
                        valid_benchmark.train_stream[0].dataset[:][0]))

        self.assertTrue(
            torch.equal(experience_2_x[:60],
                        valid_benchmark.train_stream[1].dataset[:][0]))

        self.assertTrue(
            torch.equal(experience_1_y[:80],
                        valid_benchmark.train_stream[0].dataset[:][1]))

        self.assertTrue(
            torch.equal(experience_2_y[:60],
                        valid_benchmark.train_stream[1].dataset[:][1]))

        self.assertTrue(
            torch.equal(experience_1_x[80:],
                        valid_benchmark.valid_stream[0].dataset[:][0]))

        self.assertTrue(
            torch.equal(experience_2_x[60:],
                        valid_benchmark.valid_stream[1].dataset[:][0]))

        self.assertTrue(
            torch.equal(experience_1_y[80:],
                        valid_benchmark.valid_stream[0].dataset[:][1]))

        self.assertTrue(
            torch.equal(experience_2_y[60:],
                        valid_benchmark.valid_stream[1].dataset[:][1]))

        self.assertTrue(
            torch.equal(test_x, valid_benchmark.test_stream[0].dataset[:][0]))

        self.assertTrue(
            torch.equal(test_y, valid_benchmark.test_stream[0].dataset[:][1]))
Example #16
0
def CTrL(
    stream_name: str,
    save_to_disk: bool = False,
    path: Path = default_dataset_location(""),
    seed: int = None,
    n_tasks: int = None,
):
    """
    Gives access to the Continual Transfer Learning benchmark streams
    introduced in https://arxiv.org/abs/2012.12631.
    :param stream_name: Name of the test stream to generate. Must be one of
    `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`.
    :param save_to_disk:  Whether to save each stream on the disk or load
    everything in memory. Setting it to `True` will save memory but takes more
    time on the first generation using the corresponding seed.
    :param path: The path under which the generated stream will be saved if
    save_to_disk is True.
    :param seed: The seed to use to generate the streams. If no seed is given,
    a random one will be used to make sure that the generated stream can
    be reproduced.
    :param n_tasks: The number of tasks to generate. This parameter is only
    relevant for the `s_long` stream, as all other streams have a fixed number
    of tasks.
    :return: A scenario containing 3 streams: train, val and test.
    """
    seed = seed or random.randint(0, sys.maxsize)
    if stream_name != "s_long" and n_tasks is not None:
        raise ValueError("The n_tasks parameter can only be used with the "
                         f'"s_long" stream, asked {n_tasks} for {stream_name}')
    elif stream_name == "s_long" and n_tasks is None:
        n_tasks = 100

    stream = ctrl.get_stream(stream_name, seed)

    if save_to_disk:
        folder = path / "ctrl" / stream_name / f"seed_{seed}"

    # Train, val and test experiences
    exps = [[], [], []]
    for t_id, t in enumerate(tqdm(stream, desc=f"Loading {stream_name}"), ):
        trans = transforms.Normalize(t.statistics["mean"], t.statistics["std"])
        for split, split_name, exp in zip(t.datasets, t.split_names, exps):
            samples, labels = split.tensors
            task_labels = [t.id] * samples.size(0)
            if save_to_disk:
                exp_folder = folder / f"exp_{t_id}" / split_name
                exp_folder.mkdir(parents=True, exist_ok=True)
                files = []
                for i, (sample, label) in enumerate(zip(samples, labels)):
                    sample_path = exp_folder / f"sample_{i}.png"
                    if not sample_path.exists():
                        F.to_pil_image(sample).save(sample_path)
                    files.append((sample_path, label.item()))

                common_root, exp_paths_list = common_paths_root(files)
                paths_dataset = PathsDataset(common_root, exp_paths_list)
                dataset = AvalancheDataset(
                    paths_dataset,
                    task_labels=task_labels,
                    transform=transforms.Compose(
                        [transforms.ToTensor(), trans]),
                )
            else:
                dataset = AvalancheTensorDataset(
                    samples,
                    labels.squeeze(1),
                    task_labels=task_labels,
                    transform=trans,
                )
            exp.append(dataset)
        if stream_name == "s_long" and t_id == n_tasks - 1:
            break

    return dataset_benchmark(
        train_datasets=exps[0],
        test_datasets=exps[2],
        other_streams_datasets=dict(val=exps[1]),
    )
    def test_classes_in_exp(self):
        train_exps = []

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 70, (200, ))
        tensor_t = torch.randint(0, 5, (200, ))
        train_exps.append(
            AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t))

        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200, ))
        tensor_t = torch.randint(0, 5, (200, ))
        train_exps.append(
            AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t))

        test_exps = []
        test_x = torch.rand(200, 3, 28, 28)
        test_y = torch.randint(100, 200, (200, ))
        test_t = torch.randint(0, 5, (200, ))
        test_exps.append(
            AvalancheTensorDataset(test_x, test_y, task_labels=test_t))

        other_stream_exps = []
        other_x = torch.rand(200, 3, 28, 28)
        other_y = torch.randint(400, 600, (200, ))
        other_t = torch.randint(0, 5, (200, ))
        other_stream_exps.append(
            AvalancheTensorDataset(other_x, other_y, task_labels=other_t))

        benchmark_instance = dataset_benchmark(
            train_datasets=train_exps,
            test_datasets=test_exps,
            other_streams_datasets={'other': other_stream_exps})

        train_0_classes = benchmark_instance.classes_in_experience['train'][0]
        train_1_classes = benchmark_instance.classes_in_experience['train'][1]
        train_0_classes_min = min(train_0_classes)
        train_1_classes_min = min(train_1_classes)
        train_0_classes_max = max(train_0_classes)
        train_1_classes_max = max(train_1_classes)
        self.assertGreaterEqual(train_0_classes_min, 0)
        self.assertLess(train_0_classes_max, 70)
        self.assertGreaterEqual(train_1_classes_min, 0)
        self.assertLess(train_1_classes_max, 100)

        # Test deprecated behavior
        train_0_classes = benchmark_instance.classes_in_experience[0]
        train_1_classes = benchmark_instance.classes_in_experience[1]
        train_0_classes_min = min(train_0_classes)
        train_1_classes_min = min(train_1_classes)
        train_0_classes_max = max(train_0_classes)
        train_1_classes_max = max(train_1_classes)
        self.assertGreaterEqual(train_0_classes_min, 0)
        self.assertLess(train_0_classes_max, 70)
        self.assertGreaterEqual(train_1_classes_min, 0)
        self.assertLess(train_1_classes_max, 100)
        # End test deprecated behavior

        test_0_classes = benchmark_instance.classes_in_experience['test'][0]
        test_0_classes_min = min(test_0_classes)
        test_0_classes_max = max(test_0_classes)
        self.assertGreaterEqual(test_0_classes_min, 100)
        self.assertLess(test_0_classes_max, 200)

        other_0_classes = benchmark_instance.classes_in_experience['other'][0]
        other_0_classes_min = min(other_0_classes)
        other_0_classes_max = max(other_0_classes)
        self.assertGreaterEqual(other_0_classes_min, 400)
        self.assertLess(other_0_classes_max, 600)
Example #18
0
    def setUpClass(cls) -> None:
        torch.manual_seed(0)
        np.random.seed(0)
        random.seed(0)

        n_samples_per_class = 100
        datasets = []
        for i in range(3):
            dataset = make_classification(n_samples=3 * n_samples_per_class,
                                          n_classes=3,
                                          n_features=3,
                                          n_informative=3,
                                          n_redundant=0)
            X = torch.from_numpy(dataset[0]).float()
            y = torch.from_numpy(dataset[1]).long()
            train_X, test_X, train_y, test_y = train_test_split(X,
                                                                y,
                                                                train_size=0.5,
                                                                shuffle=True,
                                                                stratify=y)
            datasets.append((train_X, train_y, test_X, test_y))

        tr_ds = [
            AvalancheTensorDataset(
                tr_X,
                tr_y,
                dataset_type=AvalancheDatasetType.CLASSIFICATION,
                task_labels=torch.randint(0, 3, (150, )).tolist())
            for tr_X, tr_y, _, _ in datasets
        ]
        ts_ds = [
            AvalancheTensorDataset(
                ts_X,
                ts_y,
                dataset_type=AvalancheDatasetType.CLASSIFICATION,
                task_labels=torch.randint(0, 3, (150, )).tolist())
            for _, _, ts_X, ts_y in datasets
        ]
        benchmark = dataset_benchmark(train_datasets=tr_ds,
                                      test_datasets=ts_ds)
        model = SimpleMLP(num_classes=3, input_size=3)

        f = open('log.txt', 'w')
        text_logger = TextLogger(f)
        eval_plugin = EvaluationPlugin(
            accuracy_metrics(minibatch=True,
                             epoch=True,
                             epoch_running=True,
                             experience=True,
                             stream=True,
                             trained_experience=True),
            loss_metrics(minibatch=True,
                         epoch=True,
                         epoch_running=True,
                         experience=True,
                         stream=True),
            forgetting_metrics(experience=True, stream=True),
            confusion_matrix_metrics(num_classes=3,
                                     save_image=False,
                                     normalize='all',
                                     stream=True),
            bwt_metrics(experience=True, stream=True),
            forward_transfer_metrics(experience=True, stream=True),
            cpu_usage_metrics(minibatch=True,
                              epoch=True,
                              epoch_running=True,
                              experience=True,
                              stream=True),
            timing_metrics(minibatch=True,
                           epoch=True,
                           epoch_running=True,
                           experience=True,
                           stream=True),
            ram_usage_metrics(every=0.5,
                              minibatch=True,
                              epoch=True,
                              experience=True,
                              stream=True),
            disk_usage_metrics(minibatch=True,
                               epoch=True,
                               experience=True,
                               stream=True),
            MAC_metrics(minibatch=True, epoch=True, experience=True),
            loggers=[text_logger],
            collect_all=True)  # collect all metrics (set to True by default)
        cl_strategy = BaseStrategy(model,
                                   SGD(model.parameters(),
                                       lr=0.001,
                                       momentum=0.9),
                                   CrossEntropyLoss(),
                                   train_mb_size=2,
                                   train_epochs=2,
                                   eval_mb_size=2,
                                   device=DEVICE,
                                   evaluator=eval_plugin,
                                   eval_every=1)
        for i, experience in enumerate(benchmark.train_stream):
            cl_strategy.train(experience,
                              eval_streams=[benchmark.test_stream],
                              shuffle=False)
            cl_strategy.eval(benchmark.test_stream)
        cls.all_metrics = cl_strategy.evaluator.get_all_metrics()
        f.close()
        # # Uncomment me to regenerate the reference metrics. Make sure
        # # the old tests were passing for all unchanged metrics
        # with open(os.path.join(pathlib.Path(__file__).parent.absolute(),
        #                        'target_metrics',
        #                        'tpp.pickle'), 'wb') as f:
        #     pickle.dump(dict(cls.all_metrics), f,
        #                 protocol=4)
        with open(
                os.path.join(
                    pathlib.Path(__file__).parent.absolute(), 'target_metrics',
                    'tpp.pickle'), 'rb') as f:
            cls.ref = pickle.load(f)
Example #19
0
    def test_custom_streams_name_and_length(self):

        train_exps = []
        test_exps = []
        valid_exps = []

        for _ in range(5):
            tensor_x = torch.rand(200, 3, 28, 28)
            tensor_y = torch.randint(0, 100, (200,))
            tensor_t = torch.randint(0, 5, (200,))
            train_exps.append(
                AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)
            )

        for _ in range(3):
            tensor_x = torch.rand(150, 3, 28, 28)
            tensor_y = torch.randint(0, 100, (150,))
            tensor_t = torch.randint(0, 3, (150,))
            test_exps.append(
                AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)
            )

        for _ in range(4):
            tensor_x = torch.rand(220, 3, 28, 28)
            tensor_y = torch.randint(0, 100, (220,))
            tensor_t = torch.randint(0, 5, (220,))
            valid_exps.append(
                AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)
            )

        valid_origin_dataset = AvalancheTensorDataset(
            torch.ones(10, 3, 32, 32), torch.zeros(10)
        )

        valid_t_labels = [{9}, {4, 5}, {7, 8}, {0}, {3}]

        with self.assertRaises(Exception):
            benchmark_instance = GenericCLScenario(
                stream_definitions={
                    "train": (train_exps,),
                    "test": (test_exps,),
                    "valid": (valid_exps, valid_t_labels, valid_origin_dataset),
                }
            )

        valid_t_labels = valid_t_labels[:-1]

        benchmark_instance = GenericCLScenario(
            stream_definitions={
                "train": (train_exps,),
                "test": (test_exps,),
                "valid": (valid_exps, valid_t_labels, valid_origin_dataset),
            }
        )

        self.assertEqual(5, len(benchmark_instance.train_stream))
        self.assertEqual(3, len(benchmark_instance.test_stream))
        self.assertEqual(4, len(benchmark_instance.valid_stream))

        self.assertEqual(None, benchmark_instance.original_train_dataset)
        self.assertEqual(None, benchmark_instance.original_test_dataset)
        self.assertEqual(
            valid_origin_dataset, benchmark_instance.original_valid_dataset
        )

        for i, exp in enumerate(benchmark_instance.train_stream):
            expect_x, expect_y, expect_t = train_exps[i][0]
            got_x, got_y, got_t = exp.dataset[0]

            self.assertTrue(torch.equal(expect_x, got_x))
            self.assertTrue(torch.equal(expect_y, got_y))
            self.assertEqual(int(expect_t), got_t)

            exp_t_labels = set(exp.task_labels)
            self.assertLess(max(exp_t_labels), 5)
            self.assertGreaterEqual(min(exp_t_labels), 0)

        for i, exp in enumerate(benchmark_instance.test_stream):
            expect_x, expect_y, expect_t = test_exps[i][0]
            got_x, got_y, got_t = exp.dataset[0]

            self.assertTrue(torch.equal(expect_x, got_x))
            self.assertTrue(torch.equal(expect_y, got_y))
            self.assertEqual(int(expect_t), got_t)

            exp_t_labels = set(exp.task_labels)
            self.assertLess(max(exp_t_labels), 3)
            self.assertGreaterEqual(min(exp_t_labels), 0)

        for i, exp in enumerate(benchmark_instance.valid_stream):
            expect_x, expect_y, expect_t = valid_exps[i][0]
            got_x, got_y, got_t = exp.dataset[0]

            self.assertTrue(torch.equal(expect_x, got_x))
            self.assertTrue(torch.equal(expect_y, got_y))
            self.assertEqual(int(expect_t), got_t)

            exp_t_labels = set(exp.task_labels)

            self.assertEqual(valid_t_labels[i], exp_t_labels)
Example #20
0
    def test_data_incremental_benchmark_from_lazy_benchmark(self):
        pattern_shape = (3, 32, 32)

        # Definition of training experiences
        # Experience 1
        experience_1_x = torch.zeros(100, *pattern_shape)
        experience_1_y = torch.zeros(100, dtype=torch.long)
        experience_1_dataset = AvalancheTensorDataset(experience_1_x,
                                                      experience_1_y)

        # Experience 2
        experience_2_x = torch.zeros(80, *pattern_shape)
        experience_2_y = torch.ones(80, dtype=torch.long)
        experience_2_dataset = AvalancheTensorDataset(experience_2_x,
                                                      experience_2_y)

        # Test experience
        test_x = torch.zeros(50, *pattern_shape)
        test_y = torch.zeros(50, dtype=torch.long)
        experience_test = AvalancheTensorDataset(test_x, test_y)

        def train_gen():
            # Lazy generator of the training stream
            for dataset in [experience_1_dataset, experience_2_dataset]:
                yield dataset

        def test_gen():
            # Lazy generator of the test stream
            for dataset in [experience_test]:
                yield dataset

        initial_benchmark_instance = create_lazy_generic_benchmark(
            train_generator=LazyStreamDefinition(train_gen(), 2, [0, 0]),
            test_generator=LazyStreamDefinition(test_gen(), 1, [0]),
            complete_test_set_only=True,
            dataset_type=AvalancheDatasetType.CLASSIFICATION,
        )

        data_incremental_instance = data_incremental_benchmark(
            initial_benchmark_instance, 12, shuffle=False, drop_last=False)

        self.assertEqual(16, len(data_incremental_instance.train_stream))
        self.assertEqual(1, len(data_incremental_instance.test_stream))
        self.assertTrue(data_incremental_instance.complete_test_set_only)

        tensor_idx = 0
        ref_tensor_x = experience_1_x
        ref_tensor_y = experience_1_y
        for exp in data_incremental_instance.train_stream:
            if exp.current_experience == 8:
                # Last mini-exp from 1st exp
                self.assertEqual(4, len(exp.dataset))
            elif exp.current_experience == 15:
                # Last mini-exp from 2nd exp
                self.assertEqual(8, len(exp.dataset))
            else:
                # Other mini-exp
                self.assertEqual(12, len(exp.dataset))

            if tensor_idx >= 100:
                ref_tensor_x = experience_2_x
                ref_tensor_y = experience_2_y
                tensor_idx = 0

            for x, y, *_ in exp.dataset:
                self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x))
                self.assertTrue(torch.equal(ref_tensor_y[tensor_idx], y))
                tensor_idx += 1

        exp = data_incremental_instance.test_stream[0]
        self.assertEqual(50, len(exp.dataset))

        tensor_idx = 0
        for x, y, *_ in exp.dataset:
            self.assertTrue(torch.equal(test_x[tensor_idx], x))
            self.assertTrue(torch.equal(test_y[tensor_idx], y))
            tensor_idx += 1