def test_avalanche_subset_mixed_task_labels(self):
        dataset_mnist = MNIST('./data/mnist', download=True)
        x, y = dataset_mnist[1000]
        x2, y2 = dataset_mnist[1007]

        full_task_labels = [1] * len(dataset_mnist)
        full_task_labels[1000] = 2
        # First, test by passing len(task_labels) == len(dataset_mnist)
        dataset = AvalancheSubset(dataset_mnist,
                                  indices=[1000, 1007],
                                  task_labels=full_task_labels)

        x3, y3, t3 = dataset[0]
        x4, y4, t4 = dataset[1]
        self.assertEqual(y, y3)
        self.assertEqual(2, t3)
        self.assertEqual(y2, y4)
        self.assertEqual(1, t4)

        # Secondly, test by passing len(task_labels) == len(indices)
        dataset = AvalancheSubset(dataset_mnist,
                                  indices=[1000, 1007],
                                  task_labels=[3, 5])

        x3, y3, t3 = dataset[0]
        x4, y4, t4 = dataset[1]
        self.assertEqual(y, y3)
        self.assertEqual(3, t3)
        self.assertEqual(y2, y4)
        self.assertEqual(5, t4)
Example #2
0
    def test_sit_multi_dataset_merge(self):
        split_mapping = [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
        mnist_train = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/",
                            train=True, download=True)
        mnist_test = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/",
                           train=False, download=True)

        train_part1 = make_nc_transformation_subset(
            mnist_train, None, None, range(5))
        train_part2 = make_nc_transformation_subset(
            mnist_train, None, None, range(5, 10))
        train_part2 = AvalancheSubset(
            train_part2, class_mapping=split_mapping)

        test_part1 = make_nc_transformation_subset(
            mnist_test, None, None, range(5))
        test_part2 = make_nc_transformation_subset(
            mnist_test, None, None, range(5, 10))
        test_part2 = AvalancheSubset(test_part2,
                                     class_mapping=split_mapping)
        my_nc_scenario = nc_scenario(
            [train_part1, train_part2], [test_part1, test_part2], 5,
            task_labels=False, shuffle=True, seed=1234)

        self.assertEqual(5, my_nc_scenario.n_experiences)
        self.assertEqual(10, my_nc_scenario.n_classes)
        for batch_id in range(5):
            self.assertEqual(
                2, len(my_nc_scenario.classes_in_experience[batch_id]))

        all_classes = set()
        for batch_id in range(5):
            all_classes.update(my_nc_scenario.classes_in_experience[batch_id])

        self.assertEqual(10, len(all_classes))
Example #3
0
    def construct_exemplar_set(self, strategy: SupervisedTemplate):
        tid = strategy.clock.train_exp_counter
        benchmark = strategy.experience.benchmark
        nb_cl = benchmark.n_classes_per_exp[tid]
        previous_seen_classes = sum(benchmark.n_classes_per_exp[:tid])

        if self.fixed_memory:
            nb_protos_cl = int(
                ceil(self.memory_size / len(self.observed_classes)))
        else:
            nb_protos_cl = self.memory_size
        new_classes = self.observed_classes[
            previous_seen_classes:previous_seen_classes + nb_cl]

        dataset = strategy.experience.dataset
        targets = torch.tensor(dataset.targets)
        for iter_dico in range(nb_cl):
            cd = AvalancheSubset(
                dataset,
                torch.where(targets == new_classes[iter_dico])[0])

            class_patterns, _, _ = next(
                iter(DataLoader(cd.eval(), batch_size=len(cd))))
            class_patterns = class_patterns.to(strategy.device)

            with torch.no_grad():
                mapped_prototypes = strategy.model.feature_extractor(
                    class_patterns).detach()
            D = mapped_prototypes.T
            D = D / torch.norm(D, dim=0)

            mu = torch.mean(D, dim=1)
            order = torch.zeros(class_patterns.shape[0])
            w_t = mu

            i, added, selected = 0, 0, []
            while not added == nb_protos_cl and i < 1000:
                tmp_t = torch.mm(w_t.unsqueeze(0), D)
                ind_max = torch.argmax(tmp_t)

                if ind_max not in selected:
                    order[ind_max] = 1 + added
                    added += 1
                    selected.append(ind_max.item())

                w_t = w_t + mu - D[:, ind_max]
                i += 1

            pick = (order > 0) * (order < nb_protos_cl + 1) * 1.0
            self.x_memory.append(class_patterns[torch.where(pick == 1)[0]])
            self.y_memory.append([new_classes[iter_dico]] *
                                 len(torch.where(pick == 1)[0]))
            self.order.append(order[torch.where(pick == 1)[0]])
Example #4
0
    def test_sit_multi_dataset_one_batch_per_set(self):
        split_mapping = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6]
        mnist_train = MNIST(
            root=expanduser("~") + "/.avalanche/data/mnist/",
            train=True,
            download=True,
        )
        mnist_test = MNIST(
            root=expanduser("~") + "/.avalanche/data/mnist/",
            train=False,
            download=True,
        )

        train_part1 = make_nc_transformation_subset(mnist_train, None, None,
                                                    range(3))
        train_part2 = make_nc_transformation_subset(mnist_train, None, None,
                                                    range(3, 10))
        train_part2 = AvalancheSubset(train_part2, class_mapping=split_mapping)

        test_part1 = make_nc_transformation_subset(mnist_test, None, None,
                                                   range(3))
        test_part2 = make_nc_transformation_subset(mnist_test, None, None,
                                                   range(3, 10))
        test_part2 = AvalancheSubset(test_part2, class_mapping=split_mapping)
        my_nc_benchmark = nc_benchmark(
            [train_part1, train_part2],
            [test_part1, test_part2],
            2,
            task_labels=False,
            shuffle=True,
            seed=1234,
            one_dataset_per_exp=True,
        )

        self.assertEqual(2, my_nc_benchmark.n_experiences)
        self.assertEqual(10, my_nc_benchmark.n_classes)

        all_classes = set()
        for batch_id in range(2):
            all_classes.update(
                my_nc_benchmark.classes_in_experience["train"][batch_id])

        self.assertEqual(10, len(all_classes))

        self.assertTrue(
            (my_nc_benchmark.classes_in_experience["train"][0] == {0, 1, 2}
             and my_nc_benchmark.classes_in_experience["train"][1] == set(
                 range(3, 10))) or
            (my_nc_benchmark.classes_in_experience["train"][0] == set(
                range(3, 10)) and
             my_nc_benchmark.classes_in_experience["train"][1] == {0, 1, 2}))
    def test_sit_multi_dataset_merge(self):
        split_mapping = [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
        mnist_train = MNIST(
            root=default_dataset_location("mnist"),
            train=True,
            download=True,
        )
        mnist_test = MNIST(
            root=default_dataset_location("mnist"),
            train=False,
            download=True,
        )

        train_part1 = make_nc_transformation_subset(
            mnist_train, None, None, range(5)
        )
        train_part2 = make_nc_transformation_subset(
            mnist_train, None, None, range(5, 10)
        )
        train_part2 = AvalancheSubset(train_part2, class_mapping=split_mapping)

        test_part1 = make_nc_transformation_subset(
            mnist_test, None, None, range(5)
        )
        test_part2 = make_nc_transformation_subset(
            mnist_test, None, None, range(5, 10)
        )
        test_part2 = AvalancheSubset(test_part2, class_mapping=split_mapping)
        my_nc_benchmark = nc_benchmark(
            [train_part1, train_part2],
            [test_part1, test_part2],
            5,
            task_labels=False,
            shuffle=True,
            seed=1234,
        )

        self.assertEqual(5, my_nc_benchmark.n_experiences)
        self.assertEqual(10, my_nc_benchmark.n_classes)
        for batch_id in range(5):
            self.assertEqual(
                2, len(my_nc_benchmark.classes_in_experience["train"][batch_id])
            )

        all_classes = set()
        for batch_id in range(5):
            all_classes.update(
                my_nc_benchmark.classes_in_experience["train"][batch_id]
            )

        self.assertEqual(10, len(all_classes))
Example #6
0
 def resize(self, strategy, new_size):
     """Update the maximum size of the buffer."""
     self.max_size = new_size
     if len(self.buffer) <= self.max_size:
         return
     self.buffer = AvalancheSubset(self.buffer, torch.arange(self.max_size))
     self._buffer_weights = self._buffer_weights[:self.max_size]
    def test_avalanche_subset_task_labels_inheritance(self):
        dataset_mnist = MNIST('./data/mnist', download=True)
        random_task_labels = [
            random.randint(0, 10) for _ in range(len(dataset_mnist))
        ]
        dataset_orig = AvalancheDataset(dataset_mnist,
                                        transform=ToTensor(),
                                        task_labels=random_task_labels)

        dataset_child = AvalancheSubset(dataset_orig, indices=[1000, 1007])
        _, _, t2 = dataset_orig[1000]
        _, _, t5 = dataset_orig[1007]
        _, _, t3 = dataset_child[0]
        _, _, t6 = dataset_child[1]

        self.assertEqual(random_task_labels[1000], t2)
        self.assertEqual(random_task_labels[1007], t5)
        self.assertEqual(random_task_labels[1000], t3)
        self.assertEqual(random_task_labels[1007], t6)

        self.assertListEqual(random_task_labels,
                             list(dataset_orig.targets_task_labels))

        self.assertListEqual(
            [random_task_labels[1000], random_task_labels[1007]],
            list(dataset_child.targets_task_labels))
    def create_sub_experience_list(
            self, experience: CLExperience) -> List[CLExperience]:
        """Creates a list of sub-experiences from an experience.
        It returns a list of experiences, where each experience is
        a subset of the original experience.

        :param experience: single Experience.

        :return: list of Experience.
        """

        # Shuffle the indices
        indices = torch.randperm(len(experience.dataset))
        num_sub_exps = len(indices) // self.train_mb_size

        sub_experience_list = []
        for subexp_id in range(num_sub_exps):
            subexp_indices = indices[subexp_id *
                                     self.train_mb_size:(subexp_id + 1) *
                                     self.train_mb_size]
            sub_experience = copy.copy(experience)
            subexp_ds = AvalancheSubset(sub_experience.dataset,
                                        indices=subexp_indices)
            sub_experience.dataset = subexp_ds
            sub_experience_list.append(sub_experience)

        return sub_experience_list
    def test_avalanche_subset_collate_fn_inheritance(self):
        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200, ))
        tensor_z = torch.randint(0, 100, (200, ))

        def my_collate_fn(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 1
            z_values = torch.tensor([-1 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        def my_collate_fn2(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 2
            z_values = torch.tensor([-2 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        whole_dataset = TensorDataset(tensor_x, tensor_y, tensor_z)
        dataset = AvalancheDataset(whole_dataset, collate_fn=my_collate_fn)
        inherited = AvalancheSubset(dataset,
                                    indices=list(range(5, 150)),
                                    collate_fn=my_collate_fn2)  # Ok

        x, y, z, t = inherited[0:5]
        self.assertIsInstance(x, Tensor)
        self.assertTrue(torch.equal(tensor_x[5:10], x))
        self.assertTrue(torch.equal(tensor_y[5:10] + 2, y))
        self.assertTrue(torch.equal(torch.full((5, ), -2, dtype=torch.long),
                                    z))
        self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t))

        classification_dataset = AvalancheDataset(
            whole_dataset, dataset_type=AvalancheDatasetType.CLASSIFICATION)

        with self.assertRaises(ValueError):
            bad_inherited = AvalancheSubset(classification_dataset,
                                            indices=list(range(5, 150)),
                                            collate_fn=my_collate_fn)
        ok_inherited_classification = AvalancheSubset(classification_dataset,
                                                      indices=list(
                                                          range(5, 150)))
        self.assertEqual(AvalancheDatasetType.CLASSIFICATION,
                         ok_inherited_classification.dataset_type)
Example #10
0
 def select(self, dataset: AvalancheSubset, model: FeatureBasedModule,
            m: int) -> AvalancheSubset:
     model_device = next(model.parameters()).device
     features = cat([
         model.featurize(images.to(model_device))
         for images, *_ in DataLoader(dataset, batch_size=32)
     ])
     center = features.mean(dim=0)
     distances = pow(features - center, 2).sum(dim=1)
     return AvalancheSubset(dataset, distances.argsort()[:m])
 def test_transform_subset_transform(self):
     dataset_mnist = MNIST('./data/mnist', download=True)
     x, y = dataset_mnist[0]
     dataset = AvalancheSubset(dataset_mnist, transform=ToTensor())
     x2, y2, t2 = dataset[0]
     self.assertIsInstance(x2, Tensor)
     self.assertIsInstance(y2, int)
     self.assertIsInstance(t2, int)
     self.assertTrue(torch.equal(ToTensor()(x), x2))
     self.assertEqual(y, y2)
     self.assertEqual(0, t2)
Example #12
0
    def _split_by_class(self, data):
        # Get sample idxs per class
        class_idxs = {}
        for idx, target in enumerate(data.targets):
            if target not in class_idxs:
                class_idxs[target] = []
            class_idxs[target].append(idx)

        # Make AvalancheSubset per class
        new_groups = {}
        for c, c_idxs in class_idxs.items():
            new_groups[c] = AvalancheSubset(data, indices=c_idxs)
        return new_groups
    def test_transform_subset_indices(self):
        dataset_mnist = MNIST('./data/mnist', download=True)
        x, y = dataset_mnist[1000]
        x2, y2 = dataset_mnist[1007]

        dataset = AvalancheSubset(dataset_mnist, indices=[1000, 1007])

        x3, y3, t3 = dataset[0]
        x4, y4, t4 = dataset[1]
        self.assertTrue(pil_images_equal(x, x3))
        self.assertEqual(y, y3)
        self.assertTrue(pil_images_equal(x2, x4))
        self.assertEqual(y2, y4)
        self.assertFalse(pil_images_equal(x, x4))
        self.assertFalse(pil_images_equal(x2, x3))
Example #14
0
    def __call__(self, strategy: "BaseStrategy", **kwargs):
        new_data = strategy.experience.dataset

        # Get sample idxs per class
        cl_idxs = {}
        for idx, target in enumerate(new_data.targets):
            if target not in cl_idxs:
                cl_idxs[target] = []
            cl_idxs[target].append(idx)

        # Make AvalancheSubset per class
        cl_datasets = {}
        for c, c_idxs in cl_idxs.items():
            cl_datasets[c] = AvalancheSubset(new_data, indices=c_idxs)

        # Update seen classes
        self.seen_classes.update(cl_datasets.keys())

        # how many experiences to divide the memory over
        div_cnt = len(self.seen_classes) if self.adaptive_size \
            else self.total_num_classes
        class_mem_size = self.mem_size // div_cnt

        # Add current classes data to memory
        for c, c_mem in cl_datasets.items():
            if c in self.ext_mem:  # Merge data with previous seen data
                c_mem = AvalancheConcatDataset((c_mem, self.ext_mem[c]))
            sorted_indices = self.selection_strategy.make_sorted_indices(
                strategy, c_mem)
            self.ext_mem[c] = AvalancheSubset(c_mem, sorted_indices)

        # Distribute remaining samples using counts
        cutoff_per_exp = self.divide_remaining_samples(class_mem_size, div_cnt)

        # Use counts to remove samples from memory
        self.cutoff_memory(cutoff_per_exp)
Example #15
0
    def update_from_dataset(self, new_data: AvalancheDataset):
        """Update the buffer using the given dataset.

        :param new_data:
        :return:
        """
        new_weights = torch.rand(len(new_data))

        cat_weights = torch.cat([new_weights, self._buffer_weights])
        cat_data = AvalancheConcatDataset([new_data, self.buffer])
        sorted_weights, sorted_idxs = cat_weights.sort(descending=True)

        buffer_idxs = sorted_idxs[:self.max_size]
        self.buffer = AvalancheSubset(cat_data, buffer_idxs)
        self._buffer_weights = sorted_weights[:self.max_size]
Example #16
0
    def dataset(self) -> AvalancheDataset:
        if self._is_train():
            dataset = self.scenario.train_dataset
            patterns_indexes = \
                self.scenario.train_exps_patterns_assignment[
                    self.current_experience]
        else:
            dataset = self.scenario.test_dataset
            if self.scenario.complete_test_set_only:
                patterns_indexes = None
            else:
                patterns_indexes = self.scenario.test_exps_patterns_assignment[
                    self.current_experience]

        return AvalancheSubset(dataset, indices=patterns_indexes)
    def test_transform_subset_mapping(self):
        dataset_mnist = MNIST('./data/mnist', download=True)
        _, y = dataset_mnist[1000]

        mapping = list(range(10))
        other_classes = list(mapping)
        other_classes.remove(y)

        swap_y = random.choice(other_classes)

        mapping[y] = swap_y
        mapping[swap_y] = y

        dataset = AvalancheSubset(dataset_mnist, class_mapping=mapping)

        _, y2, _ = dataset[1000]
        self.assertEqual(y2, swap_y)
Example #18
0
 def select(self, dataset: AvalancheSubset, model: FeatureBasedModule,
            m: int) -> AvalancheSubset:
     features = cat([
         model.featurize(images.to(device)).detach()
         for images, *_ in DataLoader(dataset, batch_size=32)
     ])
     center = features.mean(dim=0)
     current_center = center * 0
     indices = []
     for i in range(m):
         candidate_centers = current_center * i / (i + 1) + features / (i +
                                                                        1)
         distances = pow(candidate_centers - center, 2).sum(dim=1)
         distances[indices] = inf
         indices.append(distances.argmin().tolist())
         current_center = candidate_centers[indices[-1]]
     return AvalancheSubset(dataset, indices)
    def test_transform_subset_composition(self):
        dataset_mnist = MNIST('./data/mnist', download=True, transform=RandomCrop(16))
        x, y = dataset_mnist[0]
        self.assertIsInstance(x, Image)
        self.assertEqual([x.width, x.height], [16, 16])
        self.assertIsInstance(y, int)

        dataset = AvalancheSubset(
            dataset_mnist, transform=ToTensor(),
            target_transform=lambda target: -1)

        x2, y2, t2 = dataset[0]
        self.assertIsInstance(x2, Tensor)
        self.assertEqual(x2.shape, (1, 16, 16))
        self.assertIsInstance(y2, int)
        self.assertIsInstance(t2, int)
        self.assertEqual(y2, -1)
        self.assertEqual(0, t2)
Example #20
0
def make_nc_transformation_subset(
    dataset: SupportedDataset,
    transform: Any,
    target_transform: Any,
    classes: Union[None, Sequence[int]],
    bucket_classes: bool = False,
    sort_classes: bool = False,
    sort_indexes: bool = False,
) -> AvalancheSubset:
    """
    Creates a subset given the list of classes the patterns should belong to.

    :param dataset: The original dataset
    :param transform: The transform function for patterns. Can be None.
    :param target_transform: The transform function for targets. Can be None.
    :param classes: A list of classes used to filter the dataset patterns.
        Patterns belonging to one of those classes will be included. If None,
        all patterns will be included.
    :param bucket_classes: If True, the final Dataset will output patterns by
        grouping them by class. Defaults to True.
    :param sort_classes: If ``bucket_classes`` and ``sort_classes`` are both
        True, the final Dataset will output patterns by grouping them by class
        and the class groups will be ordered by class ID (ascending). Ignored
        if ``bucket_classes`` is False. Defaults to False.
    :param sort_indexes: If True, pattern indexes will be sorted (ascending).
        When grouping by class, patterns will be sorted inside their respective
        class buckets. Defaults to False.

    :returns: A :class:`TransformationSubset` that includes only patterns
        belonging to the given classes, in the order controlled by the
        ``bucket_classes``, ``sort_classes`` and ``sort_indexes`` parameters.
    """
    return AvalancheSubset(
        dataset,
        indices=_indexes_from_set(
            dataset.targets,
            classes,
            bucket_classes=bucket_classes,
            sort_classes=sort_classes,
            sort_indexes=sort_indexes,
        ),
        transform=transform,
        target_transform=target_transform,
    )
Example #21
0
def shrinking_experience_size_split_strategy(
        experience: Experience):

    experience_size = 1000 

    exp_dataset = experience.dataset
    exp_indices = list(range(len(exp_dataset)))

    result_datasets = []

    exp_indices = \
        torch.as_tensor(exp_indices)[
            torch.randperm(len(exp_indices))
        ].tolist()

    result_datasets.append(AvalancheSubset(
        exp_dataset, indices=exp_indices[0:experience_size]))

    return result_datasets
Example #22
0
    def update(self, strategy: "BaseStrategy", **kwargs):
        new_data = strategy.experience.dataset

        # Get sample idxs per class
        cl_idxs = {}
        for idx, target in enumerate(new_data.targets):
            if target not in cl_idxs:
                cl_idxs[target] = []
            cl_idxs[target].append(idx)

        # Make AvalancheSubset per class
        cl_datasets = {}
        for c, c_idxs in cl_idxs.items():
            cl_datasets[c] = AvalancheSubset(new_data, indices=c_idxs)

        # Update seen classes
        self.seen_classes.update(cl_datasets.keys())

        # associate lengths to classes
        lens = self.get_group_lengths(len(self.seen_classes))
        class_to_len = {}
        for class_id, ll in zip(self.seen_classes, lens):
            class_to_len[class_id] = ll

        # update buffers with new data
        for class_id, new_data_c in cl_datasets.items():
            ll = class_to_len[class_id]
            if class_id in self.buffer_groups:
                old_buffer_c = self.buffer_groups[class_id]
                old_buffer_c.update_from_dataset(new_data_c)
                old_buffer_c.resize(strategy, ll)
            else:
                new_buffer = ReservoirSamplingBuffer(ll)
                new_buffer.update_from_dataset(new_data_c)
                self.buffer_groups[class_id] = new_buffer

        # resize buffers
        for class_id, class_buf in self.buffer_groups.items():
            self.buffer_groups[class_id].resize(
                strategy, class_to_len[class_id]
            )
Example #23
0
 def resize(self, strategy, new_size: int):
     self.max_size = new_size
     idxs = self.selection_strategy.make_sorted_indices(strategy=strategy,
                                                        data=self.buffer)
     self.buffer = AvalancheSubset(self.buffer, idxs[:self.max_size])
Example #24
0
    def __init__(
            self,
            train_dataset: AvalancheDataset,
            test_dataset: AvalancheDataset,
            n_experiences: int,
            task_labels: bool = False,
            shuffle: bool = True,
            seed: Optional[int] = None,
            balance_experiences: bool = False,
            min_class_patterns_in_exp: int = 0,
            fixed_exp_assignment: Optional[Sequence[Sequence[int]]] = None,
            reproducibility_data: Optional[Dict[str, Any]] = None):
        """
        Creates a NIScenario instance given the training and test Datasets and
        the number of experiences.

        :param train_dataset: The training dataset. The dataset must be an
            instance of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``train_dataset=AvalancheDataset(torchvision_dataset)``.
        :param test_dataset: The test dataset. The dataset must be a
            subclass of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``test_dataset=AvalancheDataset(torchvision_dataset)``.
        :param n_experiences: The number of experiences.
        :param task_labels: If True, each experience will have an ascending task
            label. If False, the task label will be 0 for all the experiences.
            Defaults to False.
        :param shuffle: If True, the patterns order will be shuffled. Defaults
            to True.
        :param seed: If shuffle is True and seed is not None, the class order
            will be shuffled according to the seed. When None, the current
            PyTorch random number generator state will be used.
            Defaults to None.
        :param balance_experiences: If True, pattern of each class will be
            equally spread across all experiences. If False, patterns will be
            assigned to experiences in a complete random way. Defaults to False.
        :param min_class_patterns_in_exp: The minimum amount of patterns of
            every class that must be assigned to every experience. Compatible
            with the ``balance_experiences`` parameter. An exception will be
            raised if this constraint can't be satisfied. Defaults to 0.
        :param fixed_exp_assignment: If not None, the pattern assignment
            to use. It must be a list with an entry for each experience. Each
            entry is a list that contains the indexes of patterns belonging to
            that experience. Overrides the ``shuffle``, ``balance_experiences``
            and ``min_class_patterns_in_exp`` parameters.
        :param reproducibility_data: If not None, overrides all the other
            scenario definition options, including ``fixed_exp_assignment``.
            This is usually a dictionary containing data used to
            reproduce a specific experiment. One can use the
            ``get_reproducibility_data`` method to get (and even distribute)
            the experiment setup so that it can be loaded by passing it as this
            parameter. In this way one can be sure that the same specific
            experimental setup is being used (for reproducibility purposes).
            Beware that, in order to reproduce an experiment, the same train and
            test datasets must be used. Defaults to None.
        """

        self._has_task_labels = task_labels

        self.train_exps_patterns_assignment = []

        if reproducibility_data is not None:
            self.train_exps_patterns_assignment = reproducibility_data[
                'exps_patterns_assignment']
            self._has_task_labels = reproducibility_data['has_task_labels']
            n_experiences = len(self.train_exps_patterns_assignment)

        if n_experiences < 1:
            raise ValueError('Invalid number of experiences (n_experiences '
                             'parameter): must be greater than 0')

        if min_class_patterns_in_exp < 0 and reproducibility_data is None:
            raise ValueError('Invalid min_class_patterns_in_exp parameter: '
                             'must be greater than or equal to 0')

        # # Good idea, but doesn't work
        # transform_groups = train_eval_transforms(train_dataset, test_dataset)
        #
        # train_dataset = train_dataset \
        #     .replace_transforms(*transform_groups['train'], group='train') \
        #     .replace_transforms(*transform_groups['eval'], group='eval')
        #
        # test_dataset = test_dataset \
        #     .replace_transforms(*transform_groups['train'], group='train') \
        #     .replace_transforms(*transform_groups['eval'], group='eval')

        unique_targets, unique_count = torch.unique(torch.as_tensor(
            train_dataset.targets),
                                                    return_counts=True)

        self.n_classes: int = len(unique_targets)
        """
        The amount of classes in the original training set.
        """

        self.n_patterns_per_class: List[int] = \
            [0 for _ in range(self.n_classes)]
        """
        The amount of patterns for each class in the original training set.
        """

        if fixed_exp_assignment:
            included_patterns = list()
            for exp_def in fixed_exp_assignment:
                included_patterns.extend(exp_def)
            subset = AvalancheSubset(train_dataset, indices=included_patterns)
            unique_targets, unique_count = torch.unique(torch.as_tensor(
                subset.targets),
                                                        return_counts=True)

        for unique_idx in range(len(unique_targets)):
            class_id = int(unique_targets[unique_idx])
            class_count = int(unique_count[unique_idx])
            self.n_patterns_per_class[class_id] = class_count

        self.n_patterns_per_experience: List[int] = []
        """
        The number of patterns in each experience.
        """

        self.exp_structure: List[List[int]] = []
        """ This field contains, for each training experience, the number of
        instances of each class assigned to that experience. """

        if reproducibility_data or fixed_exp_assignment:
            # fixed_patterns_assignment/reproducibility_data is the user
            # provided pattern assignment. All we have to do is populate
            # remaining fields of the class!
            # n_patterns_per_experience is filled later based on exp_structure
            # so we only need to fill exp_structure.

            if reproducibility_data:
                exp_patterns = self.train_exps_patterns_assignment
            else:
                exp_patterns = fixed_exp_assignment
            self.exp_structure = _exp_structure_from_assignment(
                train_dataset, exp_patterns, self.n_classes)
        else:
            # All experiences will all contain the same amount of patterns
            # The amount of patterns doesn't need to be divisible without
            # remainder by the number of experience, so we distribute remaining
            # patterns across randomly selected experience (when shuffling) or
            # the first N experiences (when not shuffling). However, we first
            # have to check if the min_class_patterns_in_exp constraint is
            # satisfiable.
            min_class_patterns = min(self.n_patterns_per_class)
            if min_class_patterns < n_experiences * min_class_patterns_in_exp:
                raise ValueError('min_class_patterns_in_exp constraint '
                                 'can\'t be satisfied')

            if seed is not None:
                torch.random.manual_seed(seed)

            # First, get the patterns indexes for each class
            targets_as_tensor = torch.as_tensor(train_dataset.targets)
            classes_to_patterns_idx = [
                torch.nonzero(torch.eq(targets_as_tensor,
                                       class_id)).view(-1).tolist()
                for class_id in range(self.n_classes)
            ]

            if shuffle:
                classes_to_patterns_idx = [
                    torch.as_tensor(cls_patterns)[torch.randperm(
                        len(cls_patterns))].tolist()
                    for cls_patterns in classes_to_patterns_idx
                ]

            # Here we assign patterns to each experience. Two different
            # strategies are required in order to manage the
            # balance_experiences parameter.
            if balance_experiences:
                # If balance_experiences is True we have to make sure that
                # patterns of each class are equally distributed across
                # experiences.
                #
                # To do this, populate self.exp_structure, which will
                # describe how many patterns of each class are assigned to each
                # experience. Then, for each experience, assign the required
                # amount of patterns of each class.
                #
                # We already checked that there are enough patterns for each
                # class to satisfy the min_class_patterns_in_exp param so here
                # we don't need to explicitly enforce that constraint.

                # First, count how many patterns of each class we have to assign
                # to all the experiences (avg). We also get the number of
                # remaining patterns which we'll have to assign in a second
                # experience.
                class_patterns_per_exp = [
                    ((n_class_patterns // n_experiences),
                     (n_class_patterns % n_experiences))
                    for n_class_patterns in self.n_patterns_per_class
                ]

                # Remember: exp_structure[exp_id][class_id] is the amount of
                # patterns of class "class_id" in experience "exp_id"
                #
                # This is the easier experience: just assign the average amount
                # of class patterns to each experience.
                self.exp_structure = [[
                    class_patterns_this_exp[0]
                    for class_patterns_this_exp in class_patterns_per_exp
                ] for _ in range(n_experiences)]

                # Now we have to distribute the remaining patterns of each class
                #
                # This means that, for each class, we can (randomly) select
                # "n_class_patterns % n_experiences" experiences to assign a
                # single additional pattern of that class.
                for class_id in range(self.n_classes):
                    n_remaining = class_patterns_per_exp[class_id][1]
                    if n_remaining == 0:
                        continue
                    if shuffle:
                        assignment_of_remaining_patterns = torch.randperm(
                            n_experiences).tolist()[:n_remaining]
                    else:
                        assignment_of_remaining_patterns = range(n_remaining)
                    for exp_id in assignment_of_remaining_patterns:
                        self.exp_structure[exp_id][class_id] += 1

                # Following the self.exp_structure definition, assign
                # the actual patterns to each experience.
                #
                # For each experience we assign exactly
                # self.exp_structure[exp_id][class_id] patterns of
                # class "class_id"
                exp_patterns = [[] for _ in range(n_experiences)]
                next_idx_per_class = [0 for _ in range(self.n_classes)]
                for exp_id in range(n_experiences):
                    for class_id in range(self.n_classes):
                        start_idx = next_idx_per_class[class_id]
                        n_patterns = self.exp_structure[exp_id][class_id]
                        end_idx = start_idx + n_patterns
                        exp_patterns[exp_id].extend(
                            classes_to_patterns_idx[class_id]
                            [start_idx:end_idx])
                        next_idx_per_class[class_id] = end_idx
            else:
                # If balance_experiences if False, we just randomly shuffle the
                # patterns indexes and pick N patterns for each experience.
                #
                # However, we have to enforce the min_class_patterns_in_exp
                # constraint, which makes things difficult.
                # In the balance_experiences scenario, that constraint is
                # implicitly enforced by equally distributing class patterns in
                # each experience (we already checked that there are enough
                # overall patterns for each class to satisfy
                # min_class_patterns_in_exp)

                # Here we have to assign the minimum required amount of class
                # patterns to each experience first, then we can move to
                # randomly assign the remaining patterns to each experience.

                # First, initialize exp_patterns and exp_structure
                exp_patterns = [[] for _ in range(n_experiences)]
                self.exp_structure = [[0 for _ in range(self.n_classes)]
                                      for _ in range(n_experiences)]

                # For each experience we assign exactly
                # min_class_patterns_in_exp patterns from each class
                #
                # Very similar to the loop found in the balance_experiences
                # branch! Remember that classes_to_patterns_idx is already
                # shuffled (if required)
                next_idx_per_class = [0 for _ in range(self.n_classes)]
                remaining_patterns = set(range(len(train_dataset)))

                for exp_id in range(n_experiences):
                    for class_id in range(self.n_classes):
                        next_idx = next_idx_per_class[class_id]
                        end_idx = next_idx + min_class_patterns_in_exp
                        selected_patterns = \
                            classes_to_patterns_idx[next_idx:end_idx]
                        exp_patterns[exp_id].extend(selected_patterns)
                        self.exp_structure[exp_id][class_id] += \
                            min_class_patterns_in_exp
                        remaining_patterns.difference_update(selected_patterns)
                        next_idx_per_class[class_id] = end_idx

                remaining_patterns = list(remaining_patterns)

                # We have assigned the required min_class_patterns_in_exp,
                # now we assign the remaining patterns
                #
                # We'll work on remaining_patterns, which contains indexes of
                # patterns not assigned in the previous experience.
                if shuffle:
                    patterns_order = torch.as_tensor(remaining_patterns)[
                        torch.randperm(len(remaining_patterns))].tolist()
                else:
                    remaining_patterns.sort()
                    patterns_order = remaining_patterns
                targets_order = [
                    train_dataset.targets[pattern_idx]
                    for pattern_idx in patterns_order
                ]

                avg_exp_size = len(patterns_order) // n_experiences
                n_remaining = len(patterns_order) % n_experiences
                prev_idx = 0
                for exp_id in range(n_experiences):
                    next_idx = prev_idx + avg_exp_size
                    exp_patterns[exp_id].extend(
                        patterns_order[prev_idx:next_idx])
                    cls_ids, cls_counts = torch.unique(torch.as_tensor(
                        targets_order[prev_idx:next_idx]),
                                                       return_counts=True)

                    cls_ids = cls_ids.tolist()
                    cls_counts = cls_counts.tolist()

                    for unique_idx in range(len(cls_ids)):
                        self.exp_structure[exp_id][cls_ids[unique_idx]] += \
                            cls_counts[unique_idx]
                    prev_idx = next_idx

                # Distribute remaining patterns
                if n_remaining > 0:
                    if shuffle:
                        assignment_of_remaining_patterns = torch.randperm(
                            n_experiences).tolist()[:n_remaining]
                    else:
                        assignment_of_remaining_patterns = range(n_remaining)
                    for exp_id in assignment_of_remaining_patterns:
                        pattern_idx = patterns_order[prev_idx]
                        pattern_target = targets_order[prev_idx]
                        exp_patterns[exp_id].append(pattern_idx)

                        self.exp_structure[exp_id][pattern_target] += 1
                        prev_idx += 1

        self.n_patterns_per_experience = [
            len(exp_patterns[exp_id]) for exp_id in range(n_experiences)
        ]

        self._classes_in_exp = None  # Will be lazy initialized later

        train_experiences = []
        train_task_labels = []
        for t_id, exp_def in enumerate(exp_patterns):
            if self._has_task_labels:
                train_task_labels.append(t_id)
            else:
                train_task_labels.append(0)
            task_labels = ConstantSequence(train_task_labels[-1],
                                           len(train_dataset))
            train_experiences.append(
                AvalancheSubset(train_dataset,
                                indices=exp_def,
                                task_labels=task_labels))

        self.train_exps_patterns_assignment = exp_patterns
        """ A list containing which training instances are assigned to each
        experience in the train stream. Instances are identified by their id 
        w.r.t. the dataset found in the original_train_dataset field. """

        super(NIScenario, self).__init__(stream_definitions={
            'train': (train_experiences, train_task_labels, train_dataset),
            'test': (test_dataset, [0], test_dataset)
        },
                                         complete_test_set_only=True,
                                         experience_factory=NIExperience)
def split_detection_benchmark(n_experiences: int,
                              train_dataset,
                              test_dataset,
                              n_classes: int,
                              train_transform=None,
                              eval_transform=None,
                              shuffle=True):
    """
    Creates an example object detection/segmentation benchmark.

    This is a generator for toy benchmarks and should be used only to
    show how a detection benchmark can be created. It was not meant to be
    used for research purposes!

    :param n_experiences: The number of train experiences to create.
    :param train_dataset: The training dataset.
    :param test_dataset: The test dataset.
    :param n_classes: The number of categories (excluding the background).
    :param train_transform: The train transformation.
    :param eval_transform: The eval transformation.
    :param shuffle: If True, the dataset will be split randomly
    :return: A :class:`DetectionScenario` instance.
    """

    transform_groups = dict(
        train=(train_transform, None),
        eval=(eval_transform, None),
    )

    exp_n_imgs = len(train_dataset) // n_experiences
    remaining = len(train_dataset) % n_experiences

    train_dataset_avl = AvalancheDataset(train_dataset,
                                         transform_groups=transform_groups,
                                         initial_transform_group='train')
    test_dataset_avl = AvalancheDataset(test_dataset,
                                        transform_groups=transform_groups,
                                        initial_transform_group='eval')

    exp_sz = [exp_n_imgs for _ in range(n_experiences)]
    for exp_id in range(n_experiences):
        if remaining == 0:
            break

        exp_sz[exp_id] += 1
        remaining -= 1

    train_indices = [i for i in range(len(train_dataset_avl))]
    if shuffle:
        train_indices = torch.as_tensor(train_indices)[torch.randperm(
            len(train_indices))].tolist()

    train_exps_datasets = []
    last_slice_idx = 0
    for exp_id in range(n_experiences):
        n_imgs = exp_sz[exp_id]
        idx_range = train_indices[last_slice_idx:last_slice_idx + n_imgs]
        train_exps_datasets.append(
            AvalancheSubset(train_dataset_avl, indices=idx_range))
        last_slice_idx += n_imgs

    train_def = StreamUserDef(
        exps_data=train_exps_datasets,
        exps_task_labels=[0 for _ in range(len(train_exps_datasets))],
        origin_dataset=train_dataset,
        is_lazy=False)

    test_def = StreamUserDef(exps_data=[test_dataset_avl],
                             exps_task_labels=[0],
                             origin_dataset=test_dataset,
                             is_lazy=False)

    return DetectionCLScenario(n_classes=n_classes,
                               stream_definitions={
                                   'train': train_def,
                                   'test': test_def
                               },
                               complete_test_set_only=True)
Example #26
0
    def __init__(self,
                 train_dataset: AvalancheDataset,
                 test_dataset: AvalancheDataset,
                 n_experiences: int,
                 task_labels: bool,
                 shuffle: bool = True,
                 seed: Optional[int] = None,
                 fixed_class_order: Optional[Sequence[int]] = None,
                 per_experience_classes: Optional[Dict[int, int]] = None,
                 class_ids_from_zero_from_first_exp: bool = False,
                 class_ids_from_zero_in_each_exp: bool = False,
                 reproducibility_data: Optional[Dict[str, Any]] = None):
        """
        Creates a ``NCGenericScenario`` instance given the training and test
        Datasets and the number of experiences.

        By default, the number of classes will be automatically detected by
        looking at the training Dataset ``targets`` field. Classes will be
        uniformly distributed across ``n_experiences`` unless a
        ``per_experience_classes`` argument is specified.

        The number of classes must be divisible without remainder by the number
        of experiences. This also applies when the ``per_experience_classes``
        argument is not None.

        :param train_dataset: The training dataset. The dataset must be a
            subclass of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``train_dataset=AvalancheDataset(torchvision_dataset)``.
        :param test_dataset: The test dataset. The dataset must be a
            subclass of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``test_dataset=AvalancheDataset(torchvision_dataset)``.
        :param n_experiences: The number of experiences.
        :param task_labels: If True, each experience will have an ascending task
            label. If False, the task label will be 0 for all the experiences.
        :param shuffle: If True, the class order will be shuffled. Defaults to
            True.
        :param seed: If shuffle is True and seed is not None, the class order
            will be shuffled according to the seed. When None, the current
            PyTorch random number generator state will be used.
            Defaults to None.
        :param fixed_class_order: If not None, the class order to use (overrides
            the shuffle argument). Very useful for enhancing
            reproducibility. Defaults to None.
        :param per_experience_classes: Is not None, a dictionary whose keys are
            (0-indexed) experience IDs and their values are the number of
            classes to include in the respective experiences. The dictionary
            doesn't have to contain a key for each experience! All the remaining
            experiences will contain an equal amount of the remaining classes.
            The remaining number of classes must be divisible without remainder
            by the remaining number of experiences. For instance,
            if you want to include 50 classes in the first experience
            while equally distributing remaining classes across remaining
            experiences, just pass the "{0: 50}" dictionary as the
            per_experience_classes parameter. Defaults to None.
        :param class_ids_from_zero_from_first_exp: If True, original class IDs
            will be remapped so that they will appear as having an ascending
            order. For instance, if the resulting class order after shuffling
            (or defined by fixed_class_order) is [23, 34, 11, 7, 6, ...] and
            class_ids_from_zero_from_first_exp is True, then all the patterns
            belonging to class 23 will appear as belonging to class "0",
            class "34" will be mapped to "1", class "11" to "2" and so on.
            This is very useful when drawing confusion matrices and when dealing
            with algorithms with dynamic head expansion. Defaults to False.
            Mutually exclusive with the ``class_ids_from_zero_in_each_exp``
            parameter.
        :param class_ids_from_zero_in_each_exp: If True, original class IDs
            will be mapped to range [0, n_classes_in_exp) for each experience.
            Defaults to False. Mutually exclusive with the
            ``class_ids_from_zero_from_first_exp parameter``.
        :param reproducibility_data: If not None, overrides all the other
            scenario definition options. This is usually a dictionary containing
            data used to reproduce a specific experiment. One can use the
            ``get_reproducibility_data`` method to get (and even distribute)
            the experiment setup so that it can be loaded by passing it as this
            parameter. In this way one can be sure that the same specific
            experimental setup is being used (for reproducibility purposes).
            Beware that, in order to reproduce an experiment, the same train and
            test datasets must be used. Defaults to None.
        """
        if class_ids_from_zero_from_first_exp and \
                class_ids_from_zero_in_each_exp:
            raise ValueError('Invalid mutually exclusive options '
                             'class_ids_from_zero_from_first_exp and '
                             'class_ids_from_zero_in_each_exp set at the '
                             'same time')
        if reproducibility_data:
            n_experiences = reproducibility_data['n_experiences']

        if n_experiences < 1:
            raise ValueError('Invalid number of experiences (n_experiences '
                             'parameter): must be greater than 0')

        self.classes_order: List[int] = []
        """ Stores the class order (remapped class IDs). """

        self.classes_order_original_ids: List[int] = torch.unique(
            torch.as_tensor(train_dataset.targets), sorted=True).tolist()
        """ Stores the class order (original class IDs) """

        n_original_classes = max(self.classes_order_original_ids) + 1

        self.class_mapping: List[int] = []
        """
        class_mapping stores the class mapping so that 
        `mapped_class_id = class_mapping[original_class_id]`. 
        
        If the scenario is created with an amount of classes which is less than
        the amount of all classes in the dataset, then class_mapping will 
        contain some -1 values corresponding to ignored classes. This can
        happen when passing a fixed class order to the constructor.
        """

        self.n_classes_per_exp: List[int] = []
        """ A list that, for each experience (identified by its index/ID),
            stores the number of classes assigned to that experience. """

        self._classes_in_exp: List[Set[int]] = []

        self.original_classes_in_exp: List[Set[int]] = []
        """
        A list that, for each experience (identified by its index/ID), stores a 
        set of the original IDs of classes assigned to that experience. 
        This field applies to both train and test streams.
        """

        self.class_ids_from_zero_from_first_exp: bool = \
            class_ids_from_zero_from_first_exp
        """ If True the class IDs have been remapped to start from zero. """

        self.class_ids_from_zero_in_each_exp: bool = \
            class_ids_from_zero_in_each_exp
        """ If True the class IDs have been remapped to start from zero in 
        each experience """

        # Note: if fixed_class_order is None and shuffle is False,
        # the class order will be the one encountered
        # By looking at the train_dataset targets field
        if reproducibility_data:
            self.classes_order_original_ids = \
                reproducibility_data['classes_order_original_ids']
            self.class_ids_from_zero_from_first_exp = \
                reproducibility_data['class_ids_from_zero_from_first_exp']
            self.class_ids_from_zero_in_each_exp = \
                reproducibility_data['class_ids_from_zero_in_each_exp']
        elif fixed_class_order is not None:
            # User defined class order -> just use it
            if len(set(self.classes_order_original_ids).union(
                    set(fixed_class_order))) != \
                    len(self.classes_order_original_ids):
                raise ValueError(
                    'Invalid classes defined in fixed_class_order')

            self.classes_order_original_ids = list(fixed_class_order)
        elif shuffle:
            # No user defined class order.
            # If a seed is defined, set the random number generator seed.
            # If no seed has been defined, use the actual
            # random number generator state.
            # Finally, shuffle the class list to obtain a random classes
            # order
            if seed is not None:
                torch.random.manual_seed(seed)
            self.classes_order_original_ids = \
                torch.as_tensor(self.classes_order_original_ids)[
                    torch.randperm(len(self.classes_order_original_ids))
                ].tolist()

        self.n_classes: int = len(self.classes_order_original_ids)
        """ The number of classes """

        if reproducibility_data:
            self.n_classes_per_exp = \
                reproducibility_data['n_classes_per_exp']
        elif per_experience_classes is not None:
            # per_experience_classes is a user-defined dictionary that defines
            # the number of classes to include in some (or all) experiences.
            # Remaining classes are equally distributed across the other
            # experiences.
            #
            # Format of per_experience_classes dictionary:
            #   - key = experience id
            #   - value = number of classes for this experience

            if max(per_experience_classes.keys()) >= n_experiences or min(
                    per_experience_classes.keys()) < 0:
                # The dictionary contains a key (that is, a experience id) >=
                # the number of requested experiences... or < 0
                raise ValueError(
                    'Invalid experience id in per_experience_classes parameter:'
                    ' experience ids must be in range [0, n_experiences)')
            if min(per_experience_classes.values()) < 0:
                # One or more values (number of classes for each experience) < 0
                raise ValueError('Wrong number of classes defined for one or '
                                 'more experiences: must be a non-negative '
                                 'value')

            if sum(per_experience_classes.values()) > self.n_classes:
                # The sum of dictionary values (n. of classes for each
                # experience) >= the number of classes
                raise ValueError('Insufficient number of classes: '
                                 'per_experience_classes parameter can\'t '
                                 'be satisfied')

            # Remaining classes are equally distributed across remaining
            # experiences. This amount of classes must be be divisible without
            # remainder by the number of remaining experiences
            remaining_exps = n_experiences - len(per_experience_classes)
            if remaining_exps > 0 and (self.n_classes - sum(
                    per_experience_classes.values())) % remaining_exps > 0:
                raise ValueError('Invalid number of experiences: remaining '
                                 'classes cannot be divided by n_experiences')

            # default_per_exp_classes is the default amount of classes
            # for the remaining experiences
            if remaining_exps > 0:
                default_per_exp_classes = (self.n_classes - sum(
                    per_experience_classes.values())) // remaining_exps
            else:
                default_per_exp_classes = 0

            # Initialize the self.n_classes_per_exp list using
            # "default_per_exp_classes" as the default
            # amount of classes per experience. Then, loop through the
            # per_experience_classes dictionary to set the customized,
            # user defined, classes for the required experiences.
            self.n_classes_per_exp = \
                [default_per_exp_classes] * n_experiences
            for exp_id in per_experience_classes:
                self.n_classes_per_exp[exp_id] = per_experience_classes[exp_id]
        else:
            # Classes will be equally distributed across the experiences
            # The amount of classes must be be divisible without remainder
            # by the number of experiences
            if self.n_classes % n_experiences > 0:
                raise ValueError(
                    'Invalid number of experiences: classes contained in '
                    'dataset cannot be divided by n_experiences')
            self.n_classes_per_exp = \
                [self.n_classes // n_experiences] * n_experiences

        # Before populating the classes_in_experience list,
        # define the remapped class IDs.
        if reproducibility_data:
            # Method 0: use reproducibility data
            self.classes_order = reproducibility_data['classes_order']
            self.class_mapping = reproducibility_data['class_mapping']
        elif self.class_ids_from_zero_from_first_exp:
            # Method 1: remap class IDs so that they appear in ascending order
            # over all experiences
            self.classes_order = list(range(0, self.n_classes))
            self.class_mapping = [-1] * n_original_classes
            for class_id in range(n_original_classes):
                # This check is needed because, when a fixed class order is
                # used, the user may have defined an amount of classes less than
                # the overall amount of classes in the dataset.
                if class_id in self.classes_order_original_ids:
                    self.class_mapping[class_id] = \
                        self.classes_order_original_ids.index(class_id)
        elif self.class_ids_from_zero_in_each_exp:
            # Method 2: remap class IDs so that they appear in range [0, N] in
            # each experience
            self.classes_order = []
            self.class_mapping = [-1] * n_original_classes
            next_class_idx = 0
            for exp_id, exp_n_classes in enumerate(self.n_classes_per_exp):
                self.classes_order += list(range(exp_n_classes))
                for exp_class_idx in range(exp_n_classes):
                    original_class_position = next_class_idx + exp_class_idx
                    original_class_id = self.classes_order_original_ids[
                        original_class_position]
                    self.class_mapping[original_class_id] = exp_class_idx
                next_class_idx += exp_n_classes
        else:
            # Method 3: no remapping of any kind
            # remapped_id = class_mapping[class_id] -> class_id == remapped_id
            self.classes_order = self.classes_order_original_ids
            self.class_mapping = list(range(0, n_original_classes))

        original_training_dataset = train_dataset
        original_test_dataset = test_dataset

        # Populate the _classes_in_exp and original_classes_in_exp lists
        # "_classes_in_exp[exp_id]": list of (remapped) class IDs assigned
        # to experience "exp_id"
        # "original_classes_in_exp[exp_id]": list of original class IDs
        # assigned to experience "exp_id"
        for exp_id in range(n_experiences):
            classes_start_idx = sum(self.n_classes_per_exp[:exp_id])
            classes_end_idx = classes_start_idx + self.n_classes_per_exp[exp_id]

            self._classes_in_exp.append(
                set(self.classes_order[classes_start_idx:classes_end_idx]))
            self.original_classes_in_exp.append(
                set(self.classes_order_original_ids[
                    classes_start_idx:classes_end_idx]))

        # Finally, create the experience -> patterns assignment.
        # In order to do this, we don't load all the patterns
        # instead we use the targets field.
        train_exps_patterns_assignment = []
        test_exps_patterns_assignment = []

        self._has_task_labels = task_labels
        if reproducibility_data is not None:
            self._has_task_labels = bool(
                reproducibility_data['has_task_labels'])

        if self._has_task_labels:
            pattern_train_task_labels = [-1] * len(train_dataset)
            pattern_test_task_labels = [-1] * len(test_dataset)
        else:
            pattern_train_task_labels = ConstantSequence(0, len(train_dataset))
            pattern_test_task_labels = ConstantSequence(0, len(test_dataset))

        for exp_id in range(n_experiences):
            selected_classes = self.original_classes_in_exp[exp_id]
            selected_indexes_train = []
            for idx, element in enumerate(original_training_dataset.targets):
                if element in selected_classes:
                    selected_indexes_train.append(idx)
                    if self._has_task_labels:
                        pattern_train_task_labels[idx] = exp_id

            selected_indexes_test = []
            for idx, element in enumerate(original_test_dataset.targets):
                if element in selected_classes:
                    selected_indexes_test.append(idx)
                    if self._has_task_labels:
                        pattern_test_task_labels[idx] = exp_id

            train_exps_patterns_assignment.append(selected_indexes_train)
            test_exps_patterns_assignment.append(selected_indexes_test)

        # Good idea, but doesn't work
        # transform_groups = train_eval_transforms(train_dataset, test_dataset)
        #
        # train_dataset = train_dataset\
        #     .replace_transforms(*transform_groups['train'], group='train') \
        #     .replace_transforms(*transform_groups['eval'], group='eval')
        #
        # test_dataset = test_dataset \
        #     .replace_transforms(*transform_groups['train'], group='train') \
        #     .replace_transforms(*transform_groups['eval'], group='eval')

        train_dataset = AvalancheSubset(train_dataset,
                                        class_mapping=self.class_mapping,
                                        initial_transform_group='train')
        test_dataset = AvalancheSubset(test_dataset,
                                       class_mapping=self.class_mapping,
                                       initial_transform_group='eval')

        self.train_exps_patterns_assignment = train_exps_patterns_assignment
        """ A list containing which training instances are assigned to each
        experience in the train stream. Instances are identified by their id 
        w.r.t. the dataset found in the original_train_dataset field. """

        self.test_exps_patterns_assignment = test_exps_patterns_assignment
        """ A list containing which test instances are assigned to each
        experience in the test stream. Instances are identified by their id 
        w.r.t. the dataset found in the original_test_dataset field. """

        train_experiences = []
        train_task_labels = []
        for t_id, exp_def in enumerate(train_exps_patterns_assignment):
            if self._has_task_labels:
                train_task_labels.append(t_id)
            else:
                train_task_labels.append(0)
            task_labels = ConstantSequence(train_task_labels[-1],
                                           len(train_dataset))
            train_experiences.append(
                AvalancheSubset(train_dataset,
                                indices=exp_def,
                                task_labels=task_labels))

        test_experiences = []
        test_task_labels = []
        for t_id, exp_def in enumerate(test_exps_patterns_assignment):
            if self._has_task_labels:
                test_task_labels.append(t_id)
            else:
                test_task_labels.append(0)
            task_labels = ConstantSequence(test_task_labels[-1],
                                           len(test_dataset))
            test_experiences.append(
                AvalancheSubset(test_dataset,
                                indices=exp_def,
                                task_labels=task_labels))

        super(NCScenario, self).__init__(stream_definitions={
            'train': (train_experiences, train_task_labels, train_dataset),
            'test': (test_experiences, test_task_labels, test_dataset)
        },
                                         experience_factory=NCExperience)
Example #27
0
def make_per_class_subset(
        dataset: AvalancheDataset) -> Iterable[Tuple[int, AvalancheSubset]]:
    class_ids = array(dataset.targets)
    for class_id in set(class_ids):
        idx = where(class_ids == class_id)[0]
        yield class_id, AvalancheSubset(dataset, idx)
Example #28
0
 def cutoff_memory(self, cutoff_per_exp: Dict[int, int]):
     # No need to reselect at this point, we expect the first selection to
     # have sorted the exemplars
     for exp, cutoff in cutoff_per_exp.items():
         self.ext_mem[exp] = AvalancheSubset(self.ext_mem[exp],
                                             list(range(cutoff)))
    def test_mt_multi_dataset_one_task_per_set(self):
        split_mapping = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6]
        mnist_train = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/",
                            train=True,
                            download=True)
        mnist_test = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/",
                           train=False,
                           download=True)

        train_part1 = make_nc_transformation_subset(mnist_train, None, None,
                                                    range(3))
        train_part2 = make_nc_transformation_subset(mnist_train, None, None,
                                                    range(3, 10))
        train_part2 = AvalancheSubset(train_part2, class_mapping=split_mapping)

        test_part1 = make_nc_transformation_subset(mnist_test, None, None,
                                                   range(3))
        test_part2 = make_nc_transformation_subset(mnist_test, None, None,
                                                   range(3, 10))
        test_part2 = AvalancheSubset(test_part2, class_mapping=split_mapping)
        my_nc_benchmark = nc_benchmark([train_part1, train_part2],
                                       [test_part1, test_part2],
                                       2,
                                       task_labels=True,
                                       seed=1234,
                                       class_ids_from_zero_in_each_exp=True,
                                       one_dataset_per_exp=True)

        self.assertEqual(2, my_nc_benchmark.n_experiences)
        self.assertEqual(10, my_nc_benchmark.n_classes)
        self.assertEqual(2, len(my_nc_benchmark.train_stream))
        self.assertEqual(2, len(my_nc_benchmark.test_stream))

        exp_classes_train = []
        exp_classes_test = []

        all_classes_train = set()
        all_classes_test = set()

        task_info: NCExperience
        for task_id, task_info in enumerate(my_nc_benchmark.train_stream):
            self.assertLessEqual(task_id, 1)
            all_classes_train.update(
                my_nc_benchmark.classes_in_experience['train'][task_id])
            exp_classes_train.append(task_info.classes_in_this_experience)
        self.assertEqual(7, len(all_classes_train))

        for task_id, task_info in enumerate(my_nc_benchmark.test_stream):
            self.assertLessEqual(task_id, 1)
            all_classes_test.update(
                my_nc_benchmark.classes_in_experience['test'][task_id])
            exp_classes_test.append(task_info.classes_in_this_experience)
        self.assertEqual(7, len(all_classes_test))

        self.assertTrue(
            (my_nc_benchmark.classes_in_experience['train'][0] == {0, 1, 2}
             and my_nc_benchmark.classes_in_experience['train'][1] == set(
                 range(0, 7))) or
            (my_nc_benchmark.classes_in_experience['train'][0] == set(
                range(0, 7)) and
             my_nc_benchmark.classes_in_experience['train'][1] == {0, 1, 2}))

        exp_classes_ref1 = [list(range(3)), list(range(7))]
        exp_classes_ref2 = [list(range(7)), list(range(3))]

        self.assertTrue(exp_classes_train == exp_classes_ref1
                        or exp_classes_train == exp_classes_ref2)

        if exp_classes_train == exp_classes_ref1:
            self.assertTrue(exp_classes_test == exp_classes_ref1)
        else:
            self.assertTrue(exp_classes_test == exp_classes_ref2)
Example #30
0
 def select(self, dataset: AvalancheSubset, model: Module,
            m: int) -> AvalancheSubset:
     return AvalancheSubset(dataset, choice(len(dataset), m, replace=False))