def test_classes_in_this_experience(self): train_exps = [] tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 70, (200, )) tensor_t = torch.randint(0, 5, (200, )) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200, )) tensor_t = torch.randint(0, 5, (200, )) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) test_exps = [] test_x = torch.rand(200, 3, 28, 28) test_y = torch.randint(100, 200, (200, )) test_t = torch.randint(0, 5, (200, )) test_exps.append( AvalancheTensorDataset(test_x, test_y, task_labels=test_t)) other_stream_exps = [] other_x = torch.rand(200, 3, 28, 28) other_y = torch.randint(400, 600, (200, )) other_t = torch.randint(0, 5, (200, )) other_stream_exps.append( AvalancheTensorDataset(other_x, other_y, task_labels=other_t)) benchmark_instance = dataset_benchmark( train_datasets=train_exps, test_datasets=test_exps, other_streams_datasets={'other': other_stream_exps}) train_exp_0: GenericExperience = benchmark_instance.train_stream[0] train_exp_1: GenericExperience = benchmark_instance.train_stream[1] train_0_classes = train_exp_0.classes_in_this_experience train_1_classes = train_exp_1.classes_in_this_experience train_0_classes_min = min(train_0_classes) train_1_classes_min = min(train_1_classes) train_0_classes_max = max(train_0_classes) train_1_classes_max = max(train_1_classes) self.assertGreaterEqual(train_0_classes_min, 0) self.assertLess(train_0_classes_max, 70) self.assertGreaterEqual(train_1_classes_min, 0) self.assertLess(train_1_classes_max, 100) test_exp_0: GenericExperience = benchmark_instance.test_stream[0] test_0_classes = test_exp_0.classes_in_this_experience test_0_classes_min = min(test_0_classes) test_0_classes_max = max(test_0_classes) self.assertGreaterEqual(test_0_classes_min, 100) self.assertLess(test_0_classes_max, 200) other_exp_0: GenericExperience = benchmark_instance.other_stream[0] other_0_classes = other_exp_0.classes_in_this_experience other_0_classes_min = min(other_0_classes) other_0_classes_max = max(other_0_classes) self.assertGreaterEqual(other_0_classes_min, 400) self.assertLess(other_0_classes_max, 600)
def _make_tensor_datasets(self): train_exps = [] tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 70, (200,)) tensor_t = torch.randint(0, 5, (200,)) train_exps.append(AvalancheTensorDataset( tensor_x, tensor_y, task_labels=tensor_t, dataset_type=AvalancheDatasetType.CLASSIFICATION)) tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200,)) tensor_t = torch.randint(0, 5, (200,)) train_exps.append(AvalancheTensorDataset( tensor_x, tensor_y, task_labels=tensor_t, dataset_type=AvalancheDatasetType.CLASSIFICATION)) test_exps = [] test_x = torch.rand(200, 3, 28, 28) test_y = torch.randint(100, 200, (200,)) test_t = torch.randint(0, 5, (200,)) test_exps.append(AvalancheTensorDataset( test_x, test_y, task_labels=test_t, dataset_type=AvalancheDatasetType.CLASSIFICATION)) other_stream_exps = [] other_x = torch.rand(200, 3, 28, 28) other_y = torch.randint(400, 600, (200,)) other_t = torch.randint(0, 5, (200,)) other_stream_exps.append(AvalancheTensorDataset( other_x, other_y, task_labels=other_t, dataset_type=AvalancheDatasetType.CLASSIFICATION)) return train_exps, test_exps, other_stream_exps
def test(self): # Given model = AbsModel() herding = HerdingSelectionStrategy(model, "features") closest_to_center = ClosestToCenterSelectionStrategy(model, "features") # When # Features are [[0], [4], [5]] # Center is [3] dataset = AvalancheTensorDataset( tensor([0, -4, 5]).float(), zeros(3), dataset_type=AvalancheDatasetType.CLASSIFICATION ) strategy = MagicMock(device="cpu", eval_mb_size=8) # Then # Herding: # 1. At first pass, we select the -4 (at index 1) # because it is the closest ([4]) to the center in feature space # 2. At second pass, we select 0 (of index 0) # because the center will be [2], closest to [3] than the center # obtained if we were to select 5 ([4.5]) # 3. Finally we select the last remaining exemplar self.assertSequenceEqual([1, 0, 2], herding.make_sorted_indices(strategy, dataset)) # Closest to center # -4 (index 1) is the closest to the center in feature space. # Then 5 (index 2) is closest than 0 (index 0) self.assertSequenceEqual([1, 2, 0], closest_to_center.make_sorted_indices(strategy, dataset))
def after_train_dataset_adaptation(self, strategy: 'BaseStrategy', **kwargs): if strategy.training_exp_counter != 0: memory = AvalancheTensorDataset( torch.cat(self.x_memory).cpu(), list(itertools.chain.from_iterable(self.y_memory)), transform=self.buffer_transform, target_transform=None) strategy.adapted_dataset = \ AvalancheConcatDataset((strategy.adapted_dataset, memory))
def test_complete_test_set_only(self): train_exps = [] test_exps = [] for _ in range(5): tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200, )) tensor_t = torch.randint(0, 5, (200, )) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) for _ in range(3): tensor_x = torch.rand(150, 3, 28, 28) tensor_y = torch.randint(0, 100, (150, )) tensor_t = torch.randint(0, 5, (150, )) test_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) with self.assertRaises(Exception): benchmark_instance = GenericCLScenario( stream_definitions={ "train": (train_exps, ), "test": (test_exps, ), }, complete_test_set_only=True, ) benchmark_instance = GenericCLScenario( stream_definitions={ "train": (train_exps, ), "test": (test_exps[0], ), }, complete_test_set_only=True, ) self.assertEqual(5, len(benchmark_instance.train_stream)) self.assertEqual(1, len(benchmark_instance.test_stream))
def observe_exemplars(self, class2exemplars: Dict[int, List[int]], selection_order: List[int]): self.policy.selection_strategy = FixedSelectionStrategy(selection_order) x = tensor( [i for exemplars in class2exemplars.values() for i in exemplars]) y = tensor( [class_id for class_id, exemplars in class2exemplars.items() for _ in exemplars]).long() dataset = AvalancheTensorDataset( x, y, dataset_type=AvalancheDatasetType.CLASSIFICATION) self.policy(MagicMock(experience=MagicMock(dataset=dataset)))
def after_train_dataset_adaptation(self, strategy: "SupervisedTemplate", **kwargs): if strategy.clock.train_exp_counter != 0: memory = AvalancheTensorDataset( torch.cat(self.x_memory).cpu(), list(itertools.chain.from_iterable(self.y_memory)), transform=self.buffer_transform, target_transform=None, ) strategy.adapted_dataset = AvalancheConcatDataset( (strategy.adapted_dataset, memory))
def test_tensor_samples(args): p_metric = ImagesSamplePlugin( n_cols=5, n_rows=5, group=True, mode="train" ) scenario = SplitMNIST(5) curr_exp = scenario.train_stream[0] for mb in DataLoader(curr_exp.dataset, batch_size=32): break curr_dataset = AvalancheTensorDataset(*mb[:2], targets=mb[1]) strategy_mock = MagicMock( eval_mb_size=32, experience=curr_exp, adapted_dataset=curr_dataset ) mval = p_metric.after_train_dataset_adaptation(strategy_mock) img_grid = mval[0].value.image
def create_generic_scenario_from_tensor_lists( train_tensors: Sequence[Sequence[Any]], test_tensors: Sequence[Sequence[Any]], task_labels: Sequence[int], *, complete_test_set_only: bool = False, train_transform=None, train_target_transform=None, eval_transform=None, eval_target_transform=None, dataset_type: AvalancheDatasetType = None) -> GenericCLScenario: """ This helper function is DEPRECATED in favor of `create_generic_benchmark_from_tensor_lists`. Creates a generic scenario given lists of Tensors. A separate dataset will be created from each Tensor tuple (x, y, z, ...) and each of those training datasets will be considered a separate training experience. Using this helper function is the lowest-level way to create a Continual Learning scenario. When possible, consider using higher level helpers. Experiences are defined by passing lists of tensors as the `train_tensors` and `test_tensors` parameter. Those parameters must be lists containing sub-lists of tensors, one for each experience. Each tensor defines the value of a feature ("x", "y", "z", ...) for all patterns of that experience. By default the second tensor of each experience will be used to fill the `targets` value (label of each pattern). In its base form, the test lists must contain the same amount of elements of the training lists. Those pairs of datasets are then used to create the "past", "cumulative" (a.k.a. growing) and "future" test sets. However, in certain Continual Learning scenarios only the concept of "complete" test set makes sense. In that case, the ``complete_test_set_only`` should be set to True (see the parameter description for more info). :param train_tensors: A list of lists. The first list must contain the tensors for the first training experience (one tensor per feature), the second list must contain the tensors for the second training experience, and so on. :param test_tensors: A list of lists. The first list must contain the tensors for the first test experience (one tensor per feature), the second list must contain the tensors for the second test experience, and so on. When using `complete_test_set_only`, this parameter must be a list containing a single sub-list for the single test experience. :param task_labels: A list of task labels. Must contain a task label for each experience. For Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually a list of zeros. For Multi Task scenario, this is usually a list of ascending task labels (starting from 0). :param complete_test_set_only: If True, only the complete test set will be returned by the scenario. This means that ``test_tensors`` must define a single experience. Defaults to False, which means that ``train_tensors`` and ``test_tensors`` must define the same amount of experiences. :param train_transform: The transformation to apply to the training data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param train_target_transform: The transformation to apply to training patterns targets. Defaults to None. :param eval_transform: The transformation to apply to the test data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param eval_target_transform: The transformation to apply to test patterns targets. Defaults to None. :param dataset_type: The type of the dataset. Defaults to None, which means that the type will be obtained from the input datasets. If input datasets are not instances of :class:`AvalancheDataset`, the type UNDEFINED will be used. :returns: A :class:`GenericCLScenario` instance. """ warnings.warn( "create_generic_scenario_from_tensor_lists is deprecated in " "favor of create_generic_benchmark_from_tensor_lists.", DeprecationWarning, ) train_datasets = [ AvalancheTensorDataset(*exp_tensors, dataset_type=dataset_type) for exp_tensors in train_tensors ] test_datasets = [ AvalancheTensorDataset(*exp_tensors, dataset_type=dataset_type) for exp_tensors in test_tensors ] return create_multi_dataset_generic_scenario( train_datasets, test_datasets, task_labels, train_transform=train_transform, train_target_transform=train_target_transform, eval_transform=eval_transform, eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, dataset_type=dataset_type, )
def main(args): # --- CONFIG device = torch.device( f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu" ) # --------- tr_ds = [ AvalancheTensorDataset( torch.randn(10, 3), torch.randint(0, 3, (10,)).tolist(), task_labels=torch.randint(0, 5, (10,)).tolist(), ) for _ in range(3) ] ts_ds = [ AvalancheTensorDataset( torch.randn(10, 3), torch.randint(0, 3, (10,)).tolist(), task_labels=torch.randint(0, 5, (10,)).tolist(), ) for _ in range(3) ] scenario = create_multi_dataset_generic_benchmark( train_datasets=tr_ds, test_datasets=ts_ds ) # --------- # MODEL CREATION model = SimpleMLP(num_classes=3, input_size=3) # DEFINE THE EVALUATION PLUGIN AND LOGGER # The evaluation plugin manages the metrics computation. # It takes as argument a list of metrics and a list of loggers. # The evaluation plugin calls the loggers to serialize the metrics # and save them in persistent memory or print them in the standard output. # log to text file text_logger = TextLogger(open("log.txt", "a")) # print to stdout interactive_logger = InteractiveLogger() csv_logger = CSVLogger() eval_plugin = EvaluationPlugin( accuracy_metrics( minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True, ), loss_metrics( minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True, ), forgetting_metrics(experience=True, stream=True), bwt_metrics(experience=True, stream=True), cpu_usage_metrics( minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True, ), timing_metrics( minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True, ), ram_usage_metrics( every=0.5, minibatch=True, epoch=True, experience=True, stream=True ), gpu_usage_metrics( args.cuda, every=0.5, minibatch=True, epoch=True, experience=True, stream=True, ), disk_usage_metrics( minibatch=True, epoch=True, experience=True, stream=True ), MAC_metrics(minibatch=True, epoch=True, experience=True), loggers=[interactive_logger, text_logger, csv_logger], collect_all=True, ) # collect all metrics (set to True by default) # CREATE THE STRATEGY INSTANCE (NAIVE) cl_strategy = Naive( model, SGD(model.parameters(), lr=0.001, momentum=0.9), CrossEntropyLoss(), train_mb_size=500, train_epochs=1, eval_mb_size=100, device=device, evaluator=eval_plugin, eval_every=1, ) # TRAINING LOOP print("Starting experiment...") results = [] for i, experience in enumerate(scenario.train_stream): print("Start of experience: ", experience.current_experience) print("Current Classes: ", experience.classes_in_this_experience) # train returns a dictionary containing last recorded value # for each metric. res = cl_strategy.train(experience, eval_streams=[scenario.test_stream]) print("Training completed") print("Computing accuracy on the whole test set") # test returns a dictionary with the last metric collected during # evaluation on that stream results.append(cl_strategy.eval(scenario.test_stream)) print(f"Test metrics:\n{results}") # Dict with all the metric curves, # only available when `collect_all` is True. # Each entry is a (x, metric value) tuple. # You can use this dictionary to manipulate the # metrics without avalanche. all_metrics = cl_strategy.evaluator.get_all_metrics() print(f"Stored metrics: {list(all_metrics.keys())}")
def create_generic_benchmark_from_tensor_lists( train_tensors: Sequence[Sequence[Any]], test_tensors: Sequence[Sequence[Any]], *, other_streams_tensors: Dict[str, Sequence[Sequence[Any]]] = None, task_labels: Sequence[int], complete_test_set_only: bool = False, train_transform=None, train_target_transform=None, eval_transform=None, eval_target_transform=None, other_streams_transforms: Dict[str, Tuple[Any, Any]] = None, dataset_type: AvalancheDatasetType = None ) -> GenericCLScenario: """ Creates a benchmark instance given lists of Tensors. A separate dataset will be created from each Tensor tuple (x, y, z, ...) and each of those training datasets will be considered a separate training experience. Using this helper function is the lowest-level way to create a Continual Learning benchmark. When possible, consider using higher level helpers. Experiences are defined by passing lists of tensors as the `train_tensors`, `test_tensors` (and `other_streams_tensors`) parameters. Those parameters must be lists containing lists of tensors, one list for each experience. Each tensor defines the value of a feature ("x", "y", "z", ...) for all patterns of that experience. By default the second tensor of each experience will be used to fill the `targets` value (label of each pattern). Beware that task labels can only be defined by choosing a single task label for each experience (the same task label is applied to all patterns of experiences sharing the same position in different streams). When in need to create a benchmark instance in which task labels are defined in a more fine-grained way, then consider using :func:`create_multi_dataset_generic_benchmark` by passing properly initialized :class:`AvalancheDataset` instances. :param train_tensors: A list of lists. The first list must contain the tensors for the first training experience (one tensor per feature), the second list must contain the tensors for the second training experience, and so on. :param test_tensors: A list of lists. The first list must contain the tensors for the first test experience (one tensor per feature), the second list must contain the tensors for the second test experience, and so on. When using `complete_test_set_only`, this parameter must be a list containing a single sub-list for the single test experience. :param other_streams_tensors: A dictionary describing the content of custom streams. Keys must be valid stream names (letters and numbers, not starting with a number) while the value follow the same structure of `train_tensors` and `test_tensors` parameters. If this dictionary contains the definition for "train" or "test" streams then those definition will override the `train_tensors` and `test_tensors` parameters. :param task_labels: A list of task labels. Must contain at least a value for each experience. Each value describes the task label that will be applied to all patterns of a certain experience. For more info on that, see the function description. :param complete_test_set_only: If True, only the complete test set will be returned by the benchmark. This means that ``test_tensors`` must define a single experience. Defaults to False. :param train_transform: The transformation to apply to the training data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param train_target_transform: The transformation to apply to training patterns targets. Defaults to None. :param eval_transform: The transformation to apply to the test data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param eval_target_transform: The transformation to apply to test patterns targets. Defaults to None. :param other_streams_transforms: Transformations to apply to custom streams. If no transformations are defined for a custom stream, then "train" transformations will be used. This parameter must be a dictionary mapping stream names to transformations. The transformations must be a two elements tuple where the first element defines the X transformation while the second element is the Y transformation. Those elements can be None. If this dictionary contains the transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. :param dataset_type: The type of the dataset. Defaults to UNDEFINED. :returns: A :class:`GenericCLScenario` instance. """ input_streams = dict(train=train_tensors, test=test_tensors) if other_streams_tensors is not None: input_streams = {**input_streams, **other_streams_tensors} stream_definitions = dict() for stream_name, list_of_exps_tensors in input_streams.items(): stream_datasets = [] for exp_id, exp_tensors in enumerate(list_of_exps_tensors): stream_datasets.append( AvalancheTensorDataset( *exp_tensors, dataset_type=dataset_type, task_labels=task_labels[exp_id] ) ) stream_definitions[stream_name] = stream_datasets return create_multi_dataset_generic_benchmark( [], [], other_streams_datasets=stream_definitions, train_transform=train_transform, train_target_transform=train_target_transform, eval_transform=eval_transform, eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, other_streams_transforms=other_streams_transforms, dataset_type=dataset_type, )
def test_lazy_benchmark_with_validation_stream_fixed_size(self): lazy_options = [None, True, False] for lazy_option in lazy_options: with self.subTest(lazy_option=lazy_option): pattern_shape = (3, 32, 32) # Definition of training experiences # Experience 1 experience_1_x = torch.zeros(100, *pattern_shape) experience_1_y = torch.zeros(100, dtype=torch.long) experience_1_dataset = AvalancheTensorDataset( experience_1_x, experience_1_y) # Experience 2 experience_2_x = torch.zeros(80, *pattern_shape) experience_2_y = torch.ones(80, dtype=torch.long) experience_2_dataset = AvalancheTensorDataset( experience_2_x, experience_2_y) # Test experience test_x = torch.zeros(50, *pattern_shape) test_y = torch.zeros(50, dtype=torch.long) experience_test = AvalancheTensorDataset(test_x, test_y) def train_gen(): # Lazy generator of the training stream for dataset in [ experience_1_dataset, experience_2_dataset ]: yield dataset def test_gen(): # Lazy generator of the test stream for dataset in [experience_test]: yield dataset initial_benchmark_instance = create_lazy_generic_benchmark( train_generator=LazyStreamDefinition( train_gen(), 2, [0, 0]), test_generator=LazyStreamDefinition(test_gen(), 1, [0]), complete_test_set_only=True, dataset_type=AvalancheDatasetType.CLASSIFICATION, ) valid_benchmark = benchmark_with_validation_stream( initial_benchmark_instance, 20, shuffle=False, lazy_splitting=lazy_option, ) if lazy_option is None or lazy_option: expect_laziness = True else: expect_laziness = False self.assertEqual( expect_laziness, valid_benchmark.stream_definitions["train"].is_lazy, ) self.assertEqual(2, len(valid_benchmark.train_stream)) self.assertEqual(2, len(valid_benchmark.valid_stream)) self.assertEqual(1, len(valid_benchmark.test_stream)) self.assertTrue(valid_benchmark.complete_test_set_only) maybe_exp = valid_benchmark.stream_definitions[ "train"].exps_data.get_experience_if_loaded(0) self.assertEqual(expect_laziness, maybe_exp is None) self.assertEqual(80, len(valid_benchmark.train_stream[0].dataset)) maybe_exp = valid_benchmark.stream_definitions[ "train"].exps_data.get_experience_if_loaded(1) self.assertEqual(expect_laziness, maybe_exp is None) self.assertEqual(60, len(valid_benchmark.train_stream[1].dataset)) maybe_exp = valid_benchmark.stream_definitions[ "valid"].exps_data.get_experience_if_loaded(0) self.assertEqual(expect_laziness, maybe_exp is None) self.assertEqual(20, len(valid_benchmark.valid_stream[0].dataset)) maybe_exp = valid_benchmark.stream_definitions[ "valid"].exps_data.get_experience_if_loaded(1) self.assertEqual(expect_laziness, maybe_exp is None) self.assertEqual(20, len(valid_benchmark.valid_stream[1].dataset)) self.assertIsNotNone( valid_benchmark.stream_definitions["train"].exps_data. get_experience_if_loaded(0)) self.assertIsNotNone( valid_benchmark.stream_definitions["valid"].exps_data. get_experience_if_loaded(0)) self.assertIsNotNone( valid_benchmark.stream_definitions["train"].exps_data. get_experience_if_loaded(1)) self.assertIsNotNone( valid_benchmark.stream_definitions["valid"].exps_data. get_experience_if_loaded(1)) self.assertTrue( torch.equal( experience_1_x[:80], valid_benchmark.train_stream[0].dataset[:][0], )) self.assertTrue( torch.equal( experience_2_x[:60], valid_benchmark.train_stream[1].dataset[:][0], )) self.assertTrue( torch.equal( experience_1_y[:80], valid_benchmark.train_stream[0].dataset[:][1], )) self.assertTrue( torch.equal( experience_2_y[:60], valid_benchmark.train_stream[1].dataset[:][1], )) self.assertTrue( torch.equal( experience_1_x[80:], valid_benchmark.valid_stream[0].dataset[:][0], )) self.assertTrue( torch.equal( experience_2_x[60:], valid_benchmark.valid_stream[1].dataset[:][0], )) self.assertTrue( torch.equal( experience_1_y[80:], valid_benchmark.valid_stream[0].dataset[:][1], )) self.assertTrue( torch.equal( experience_2_y[60:], valid_benchmark.valid_stream[1].dataset[:][1], )) self.assertTrue( torch.equal(test_x, valid_benchmark.test_stream[0].dataset[:][0])) self.assertTrue( torch.equal(test_y, valid_benchmark.test_stream[0].dataset[:][1]))
def test_avalanche_concat_dataset_collate_fn_inheritance(self): tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200, )) tensor_z = torch.randint(0, 100, (200, )) tensor_x2 = torch.rand(200, 3, 28, 28) tensor_y2 = torch.randint(0, 100, (200, )) tensor_z2 = torch.randint(0, 100, (200, )) def my_collate_fn(patterns): x_values = torch.stack([pat[0] for pat in patterns], 0) y_values = torch.tensor([pat[1] for pat in patterns]) + 1 z_values = torch.tensor([-1 for _ in patterns]) t_values = torch.tensor([pat[3] for pat in patterns]) return x_values, y_values, z_values, t_values def my_collate_fn2(patterns): x_values = torch.stack([pat[0] for pat in patterns], 0) y_values = torch.tensor([pat[1] for pat in patterns]) + 2 z_values = torch.tensor([-2 for _ in patterns]) t_values = torch.tensor([pat[3] for pat in patterns]) return x_values, y_values, z_values, t_values dataset1 = TensorDataset(tensor_x, tensor_y, tensor_z) dataset2 = AvalancheTensorDataset(tensor_x2, tensor_y2, tensor_z2, collate_fn=my_collate_fn) concat = AvalancheConcatDataset([dataset1, dataset2], collate_fn=my_collate_fn2) # Ok x, y, z, t = dataset2[0:5] self.assertIsInstance(x, Tensor) self.assertTrue(torch.equal(tensor_x2[0:5], x)) self.assertTrue(torch.equal(tensor_y2[0:5] + 1, y)) self.assertTrue(torch.equal(torch.full((5, ), -1, dtype=torch.long), z)) self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t)) x2, y2, z2, t2 = concat[0:5] self.assertIsInstance(x2, Tensor) self.assertTrue(torch.equal(tensor_x[0:5], x2)) self.assertTrue(torch.equal(tensor_y[0:5] + 2, y2)) self.assertTrue( torch.equal(torch.full((5, ), -2, dtype=torch.long), z2)) self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t2)) dataset1_classification = AvalancheTensorDataset( tensor_x, tensor_y, tensor_z, dataset_type=AvalancheDatasetType.CLASSIFICATION) dataset2_segmentation = AvalancheDataset( dataset2, dataset_type=AvalancheDatasetType.SEGMENTATION) with self.assertRaises(ValueError): bad_concat_types = dataset1_classification + dataset2_segmentation with self.assertRaises(ValueError): bad_concat_collate = AvalancheConcatDataset( [dataset1, dataset2_segmentation], collate_fn=my_collate_fn) ok_concat_classification = dataset1_classification + dataset2 self.assertEqual(AvalancheDatasetType.CLASSIFICATION, ok_concat_classification.dataset_type) ok_concat_classification2 = dataset2 + dataset1_classification self.assertEqual(AvalancheDatasetType.CLASSIFICATION, ok_concat_classification2.dataset_type)
def create_generic_scenario_from_tensors( train_data_x: Sequence[Any], train_data_y: Sequence[Sequence[SupportsInt]], test_data_x: Union[Any, Sequence[Any]], test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]], task_labels: Sequence[int], complete_test_set_only: bool = False, train_transform=None, train_target_transform=None, test_transform=None, test_target_transform=None) -> GenericCLScenario: """ Creates a generic scenario given lists of Tensors and the respective task labels. A separate dataset will be created from each Tensor pair (x + y) and each of those training datasets will be considered a separate training experience. Contents of the datasets will not be changed, including the targets. Using this helper function is the lower level way to create a Continual Learning scenario. When possible, consider using higher level helpers. In its base form, the test lists must contain the same amount of elements of the training lists. Those pairs of datasets are then used to create the "past", "cumulative" (a.k.a. growing) and "future" test sets. However, in certain Continual Learning scenarios only the concept of "complete" test set makes sense. In that case, the ``complete_test_set_only`` should be set to True (see the parameter description for more info). :param train_data_x: A list of Tensors (one per experience) containing the patterns of the training sets. :param train_data_y: A list of Tensors or int lists containing the labels of the patterns of the training sets. Must contain the same number of elements of ``train_datasets_x``. :param test_data_x: A Tensor or a list of Tensors (one per experience) containing the patterns of the test sets. :param test_data_y: A Tensor or a list of Tensors or int lists containing the labels of the patterns of the test sets. Must contain the same number of elements of ``test_datasets_x``. :param task_labels: A list of task labels. Must contain the same amount of elements of the ``train_datasets_x`` parameter. For Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually a list of zeros. For Multi Task scenario, this is usually a list of ascending task labels (starting from 0). :param complete_test_set_only: If True, only the complete test set will be returned by the scenario. This means that the ``test_datasets_x`` and ``test_datasets_y`` parameters must be lists with a single element (the complete test set). Defaults to False, which means that ``train_file_lists`` and ``test_file_lists`` must contain the same amount of filelists paths. :param train_transform: The transformation to apply to training patterns. Defaults to None. :param train_target_transform: The transformation to apply to training patterns targets. Defaults to None. :param test_transform: The transformation to apply to test patterns. Defaults to None. :param test_target_transform: The transformation to apply to test patterns targets. Defaults to None. :returns: A :class:`GenericCLScenario` instance. """ if len(train_data_x) != len(train_data_y): raise ValueError('train_data_x and train_data_y must contain' ' the same amount of elements') if type(test_data_x) != type(test_data_y): raise ValueError('test_data_x and test_data_y must be of' ' the same type') if isinstance(test_data_x, Tensor): test_data_x = [test_data_x] test_data_y = [test_data_y] else: if len(test_data_x) != len(test_data_y): raise ValueError('test_data_x and test_data_y must contain' ' the same amount of elements') transform_groups = dict(train=(train_transform, train_target_transform), test=(test_transform, test_target_transform)) train_datasets = [ AvalancheTensorDataset(dataset_x, dataset_y, transform_groups=transform_groups, initial_transform_group='train') for dataset_x, dataset_y in zip(train_data_x, train_data_y) ] test_datasets = [ AvalancheTensorDataset(dataset_x, dataset_y, transform_groups=transform_groups, initial_transform_group='test') for dataset_x, dataset_y in zip(test_data_x, test_data_y) ] return create_multi_dataset_generic_scenario( train_datasets, test_datasets, task_labels, complete_test_set_only=complete_test_set_only)
def test_lazy_benchmark_with_validation_stream_fixed_size(self): pattern_shape = (3, 32, 32) # Definition of training experiences # Experience 1 experience_1_x = torch.zeros(100, *pattern_shape) experience_1_y = torch.zeros(100, dtype=torch.long) experience_1_dataset = AvalancheTensorDataset(experience_1_x, experience_1_y) # Experience 2 experience_2_x = torch.zeros(80, *pattern_shape) experience_2_y = torch.ones(80, dtype=torch.long) experience_2_dataset = AvalancheTensorDataset(experience_2_x, experience_2_y) # Test experience test_x = torch.zeros(50, *pattern_shape) test_y = torch.zeros(50, dtype=torch.long) experience_test = AvalancheTensorDataset(test_x, test_y) def train_gen(): # Lazy generator of the training stream for dataset in [experience_1_dataset, experience_2_dataset]: yield dataset def test_gen(): # Lazy generator of the test stream for dataset in [experience_test]: yield dataset initial_benchmark_instance = create_lazy_generic_benchmark( train_generator=LazyStreamDefinition(train_gen(), 2, [0, 0]), test_generator=LazyStreamDefinition(test_gen(), 1, [0]), complete_test_set_only=True, dataset_type=AvalancheDatasetType.CLASSIFICATION) valid_benchmark = benchmark_with_validation_stream( initial_benchmark_instance, 20, shuffle=False) self.assertEqual(2, len(valid_benchmark.train_stream)) self.assertEqual(2, len(valid_benchmark.valid_stream)) self.assertEqual(1, len(valid_benchmark.test_stream)) self.assertTrue(valid_benchmark.complete_test_set_only) self.assertEqual(80, len(valid_benchmark.train_stream[0].dataset)) self.assertEqual(60, len(valid_benchmark.train_stream[1].dataset)) self.assertEqual(20, len(valid_benchmark.valid_stream[0].dataset)) self.assertEqual(20, len(valid_benchmark.valid_stream[1].dataset)) self.assertTrue( torch.equal(experience_1_x[:80], valid_benchmark.train_stream[0].dataset[:][0])) self.assertTrue( torch.equal(experience_2_x[:60], valid_benchmark.train_stream[1].dataset[:][0])) self.assertTrue( torch.equal(experience_1_y[:80], valid_benchmark.train_stream[0].dataset[:][1])) self.assertTrue( torch.equal(experience_2_y[:60], valid_benchmark.train_stream[1].dataset[:][1])) self.assertTrue( torch.equal(experience_1_x[80:], valid_benchmark.valid_stream[0].dataset[:][0])) self.assertTrue( torch.equal(experience_2_x[60:], valid_benchmark.valid_stream[1].dataset[:][0])) self.assertTrue( torch.equal(experience_1_y[80:], valid_benchmark.valid_stream[0].dataset[:][1])) self.assertTrue( torch.equal(experience_2_y[60:], valid_benchmark.valid_stream[1].dataset[:][1])) self.assertTrue( torch.equal(test_x, valid_benchmark.test_stream[0].dataset[:][0])) self.assertTrue( torch.equal(test_y, valid_benchmark.test_stream[0].dataset[:][1]))
def CTrL( stream_name: str, save_to_disk: bool = False, path: Path = default_dataset_location(""), seed: int = None, n_tasks: int = None, ): """ Gives access to the Continual Transfer Learning benchmark streams introduced in https://arxiv.org/abs/2012.12631. :param stream_name: Name of the test stream to generate. Must be one of `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`. :param save_to_disk: Whether to save each stream on the disk or load everything in memory. Setting it to `True` will save memory but takes more time on the first generation using the corresponding seed. :param path: The path under which the generated stream will be saved if save_to_disk is True. :param seed: The seed to use to generate the streams. If no seed is given, a random one will be used to make sure that the generated stream can be reproduced. :param n_tasks: The number of tasks to generate. This parameter is only relevant for the `s_long` stream, as all other streams have a fixed number of tasks. :return: A scenario containing 3 streams: train, val and test. """ seed = seed or random.randint(0, sys.maxsize) if stream_name != "s_long" and n_tasks is not None: raise ValueError("The n_tasks parameter can only be used with the " f'"s_long" stream, asked {n_tasks} for {stream_name}') elif stream_name == "s_long" and n_tasks is None: n_tasks = 100 stream = ctrl.get_stream(stream_name, seed) if save_to_disk: folder = path / "ctrl" / stream_name / f"seed_{seed}" # Train, val and test experiences exps = [[], [], []] for t_id, t in enumerate(tqdm(stream, desc=f"Loading {stream_name}"), ): trans = transforms.Normalize(t.statistics["mean"], t.statistics["std"]) for split, split_name, exp in zip(t.datasets, t.split_names, exps): samples, labels = split.tensors task_labels = [t.id] * samples.size(0) if save_to_disk: exp_folder = folder / f"exp_{t_id}" / split_name exp_folder.mkdir(parents=True, exist_ok=True) files = [] for i, (sample, label) in enumerate(zip(samples, labels)): sample_path = exp_folder / f"sample_{i}.png" if not sample_path.exists(): F.to_pil_image(sample).save(sample_path) files.append((sample_path, label.item())) common_root, exp_paths_list = common_paths_root(files) paths_dataset = PathsDataset(common_root, exp_paths_list) dataset = AvalancheDataset( paths_dataset, task_labels=task_labels, transform=transforms.Compose( [transforms.ToTensor(), trans]), ) else: dataset = AvalancheTensorDataset( samples, labels.squeeze(1), task_labels=task_labels, transform=trans, ) exp.append(dataset) if stream_name == "s_long" and t_id == n_tasks - 1: break return dataset_benchmark( train_datasets=exps[0], test_datasets=exps[2], other_streams_datasets=dict(val=exps[1]), )
def test_classes_in_exp(self): train_exps = [] tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 70, (200, )) tensor_t = torch.randint(0, 5, (200, )) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200, )) tensor_t = torch.randint(0, 5, (200, )) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t)) test_exps = [] test_x = torch.rand(200, 3, 28, 28) test_y = torch.randint(100, 200, (200, )) test_t = torch.randint(0, 5, (200, )) test_exps.append( AvalancheTensorDataset(test_x, test_y, task_labels=test_t)) other_stream_exps = [] other_x = torch.rand(200, 3, 28, 28) other_y = torch.randint(400, 600, (200, )) other_t = torch.randint(0, 5, (200, )) other_stream_exps.append( AvalancheTensorDataset(other_x, other_y, task_labels=other_t)) benchmark_instance = dataset_benchmark( train_datasets=train_exps, test_datasets=test_exps, other_streams_datasets={'other': other_stream_exps}) train_0_classes = benchmark_instance.classes_in_experience['train'][0] train_1_classes = benchmark_instance.classes_in_experience['train'][1] train_0_classes_min = min(train_0_classes) train_1_classes_min = min(train_1_classes) train_0_classes_max = max(train_0_classes) train_1_classes_max = max(train_1_classes) self.assertGreaterEqual(train_0_classes_min, 0) self.assertLess(train_0_classes_max, 70) self.assertGreaterEqual(train_1_classes_min, 0) self.assertLess(train_1_classes_max, 100) # Test deprecated behavior train_0_classes = benchmark_instance.classes_in_experience[0] train_1_classes = benchmark_instance.classes_in_experience[1] train_0_classes_min = min(train_0_classes) train_1_classes_min = min(train_1_classes) train_0_classes_max = max(train_0_classes) train_1_classes_max = max(train_1_classes) self.assertGreaterEqual(train_0_classes_min, 0) self.assertLess(train_0_classes_max, 70) self.assertGreaterEqual(train_1_classes_min, 0) self.assertLess(train_1_classes_max, 100) # End test deprecated behavior test_0_classes = benchmark_instance.classes_in_experience['test'][0] test_0_classes_min = min(test_0_classes) test_0_classes_max = max(test_0_classes) self.assertGreaterEqual(test_0_classes_min, 100) self.assertLess(test_0_classes_max, 200) other_0_classes = benchmark_instance.classes_in_experience['other'][0] other_0_classes_min = min(other_0_classes) other_0_classes_max = max(other_0_classes) self.assertGreaterEqual(other_0_classes_min, 400) self.assertLess(other_0_classes_max, 600)
def setUpClass(cls) -> None: torch.manual_seed(0) np.random.seed(0) random.seed(0) n_samples_per_class = 100 datasets = [] for i in range(3): dataset = make_classification(n_samples=3 * n_samples_per_class, n_classes=3, n_features=3, n_informative=3, n_redundant=0) X = torch.from_numpy(dataset[0]).float() y = torch.from_numpy(dataset[1]).long() train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=0.5, shuffle=True, stratify=y) datasets.append((train_X, train_y, test_X, test_y)) tr_ds = [ AvalancheTensorDataset( tr_X, tr_y, dataset_type=AvalancheDatasetType.CLASSIFICATION, task_labels=torch.randint(0, 3, (150, )).tolist()) for tr_X, tr_y, _, _ in datasets ] ts_ds = [ AvalancheTensorDataset( ts_X, ts_y, dataset_type=AvalancheDatasetType.CLASSIFICATION, task_labels=torch.randint(0, 3, (150, )).tolist()) for _, _, ts_X, ts_y in datasets ] benchmark = dataset_benchmark(train_datasets=tr_ds, test_datasets=ts_ds) model = SimpleMLP(num_classes=3, input_size=3) f = open('log.txt', 'w') text_logger = TextLogger(f) eval_plugin = EvaluationPlugin( accuracy_metrics(minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True, trained_experience=True), loss_metrics(minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True), forgetting_metrics(experience=True, stream=True), confusion_matrix_metrics(num_classes=3, save_image=False, normalize='all', stream=True), bwt_metrics(experience=True, stream=True), forward_transfer_metrics(experience=True, stream=True), cpu_usage_metrics(minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True), timing_metrics(minibatch=True, epoch=True, epoch_running=True, experience=True, stream=True), ram_usage_metrics(every=0.5, minibatch=True, epoch=True, experience=True, stream=True), disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True), MAC_metrics(minibatch=True, epoch=True, experience=True), loggers=[text_logger], collect_all=True) # collect all metrics (set to True by default) cl_strategy = BaseStrategy(model, SGD(model.parameters(), lr=0.001, momentum=0.9), CrossEntropyLoss(), train_mb_size=2, train_epochs=2, eval_mb_size=2, device=DEVICE, evaluator=eval_plugin, eval_every=1) for i, experience in enumerate(benchmark.train_stream): cl_strategy.train(experience, eval_streams=[benchmark.test_stream], shuffle=False) cl_strategy.eval(benchmark.test_stream) cls.all_metrics = cl_strategy.evaluator.get_all_metrics() f.close() # # Uncomment me to regenerate the reference metrics. Make sure # # the old tests were passing for all unchanged metrics # with open(os.path.join(pathlib.Path(__file__).parent.absolute(), # 'target_metrics', # 'tpp.pickle'), 'wb') as f: # pickle.dump(dict(cls.all_metrics), f, # protocol=4) with open( os.path.join( pathlib.Path(__file__).parent.absolute(), 'target_metrics', 'tpp.pickle'), 'rb') as f: cls.ref = pickle.load(f)
def test_custom_streams_name_and_length(self): train_exps = [] test_exps = [] valid_exps = [] for _ in range(5): tensor_x = torch.rand(200, 3, 28, 28) tensor_y = torch.randint(0, 100, (200,)) tensor_t = torch.randint(0, 5, (200,)) train_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t) ) for _ in range(3): tensor_x = torch.rand(150, 3, 28, 28) tensor_y = torch.randint(0, 100, (150,)) tensor_t = torch.randint(0, 3, (150,)) test_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t) ) for _ in range(4): tensor_x = torch.rand(220, 3, 28, 28) tensor_y = torch.randint(0, 100, (220,)) tensor_t = torch.randint(0, 5, (220,)) valid_exps.append( AvalancheTensorDataset(tensor_x, tensor_y, task_labels=tensor_t) ) valid_origin_dataset = AvalancheTensorDataset( torch.ones(10, 3, 32, 32), torch.zeros(10) ) valid_t_labels = [{9}, {4, 5}, {7, 8}, {0}, {3}] with self.assertRaises(Exception): benchmark_instance = GenericCLScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps,), "valid": (valid_exps, valid_t_labels, valid_origin_dataset), } ) valid_t_labels = valid_t_labels[:-1] benchmark_instance = GenericCLScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps,), "valid": (valid_exps, valid_t_labels, valid_origin_dataset), } ) self.assertEqual(5, len(benchmark_instance.train_stream)) self.assertEqual(3, len(benchmark_instance.test_stream)) self.assertEqual(4, len(benchmark_instance.valid_stream)) self.assertEqual(None, benchmark_instance.original_train_dataset) self.assertEqual(None, benchmark_instance.original_test_dataset) self.assertEqual( valid_origin_dataset, benchmark_instance.original_valid_dataset ) for i, exp in enumerate(benchmark_instance.train_stream): expect_x, expect_y, expect_t = train_exps[i][0] got_x, got_y, got_t = exp.dataset[0] self.assertTrue(torch.equal(expect_x, got_x)) self.assertTrue(torch.equal(expect_y, got_y)) self.assertEqual(int(expect_t), got_t) exp_t_labels = set(exp.task_labels) self.assertLess(max(exp_t_labels), 5) self.assertGreaterEqual(min(exp_t_labels), 0) for i, exp in enumerate(benchmark_instance.test_stream): expect_x, expect_y, expect_t = test_exps[i][0] got_x, got_y, got_t = exp.dataset[0] self.assertTrue(torch.equal(expect_x, got_x)) self.assertTrue(torch.equal(expect_y, got_y)) self.assertEqual(int(expect_t), got_t) exp_t_labels = set(exp.task_labels) self.assertLess(max(exp_t_labels), 3) self.assertGreaterEqual(min(exp_t_labels), 0) for i, exp in enumerate(benchmark_instance.valid_stream): expect_x, expect_y, expect_t = valid_exps[i][0] got_x, got_y, got_t = exp.dataset[0] self.assertTrue(torch.equal(expect_x, got_x)) self.assertTrue(torch.equal(expect_y, got_y)) self.assertEqual(int(expect_t), got_t) exp_t_labels = set(exp.task_labels) self.assertEqual(valid_t_labels[i], exp_t_labels)
def test_data_incremental_benchmark_from_lazy_benchmark(self): pattern_shape = (3, 32, 32) # Definition of training experiences # Experience 1 experience_1_x = torch.zeros(100, *pattern_shape) experience_1_y = torch.zeros(100, dtype=torch.long) experience_1_dataset = AvalancheTensorDataset(experience_1_x, experience_1_y) # Experience 2 experience_2_x = torch.zeros(80, *pattern_shape) experience_2_y = torch.ones(80, dtype=torch.long) experience_2_dataset = AvalancheTensorDataset(experience_2_x, experience_2_y) # Test experience test_x = torch.zeros(50, *pattern_shape) test_y = torch.zeros(50, dtype=torch.long) experience_test = AvalancheTensorDataset(test_x, test_y) def train_gen(): # Lazy generator of the training stream for dataset in [experience_1_dataset, experience_2_dataset]: yield dataset def test_gen(): # Lazy generator of the test stream for dataset in [experience_test]: yield dataset initial_benchmark_instance = create_lazy_generic_benchmark( train_generator=LazyStreamDefinition(train_gen(), 2, [0, 0]), test_generator=LazyStreamDefinition(test_gen(), 1, [0]), complete_test_set_only=True, dataset_type=AvalancheDatasetType.CLASSIFICATION, ) data_incremental_instance = data_incremental_benchmark( initial_benchmark_instance, 12, shuffle=False, drop_last=False) self.assertEqual(16, len(data_incremental_instance.train_stream)) self.assertEqual(1, len(data_incremental_instance.test_stream)) self.assertTrue(data_incremental_instance.complete_test_set_only) tensor_idx = 0 ref_tensor_x = experience_1_x ref_tensor_y = experience_1_y for exp in data_incremental_instance.train_stream: if exp.current_experience == 8: # Last mini-exp from 1st exp self.assertEqual(4, len(exp.dataset)) elif exp.current_experience == 15: # Last mini-exp from 2nd exp self.assertEqual(8, len(exp.dataset)) else: # Other mini-exp self.assertEqual(12, len(exp.dataset)) if tensor_idx >= 100: ref_tensor_x = experience_2_x ref_tensor_y = experience_2_y tensor_idx = 0 for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x)) self.assertTrue(torch.equal(ref_tensor_y[tensor_idx], y)) tensor_idx += 1 exp = data_incremental_instance.test_stream[0] self.assertEqual(50, len(exp.dataset)) tensor_idx = 0 for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(test_x[tensor_idx], x)) self.assertTrue(torch.equal(test_y[tensor_idx], y)) tensor_idx += 1