def test_slice( dataset, keep_classes, discard_classes, keep_tasks, discard_tasks, error, ids ): dataset = InMemoryDataset(*dataset) if error: with pytest.raises(Exception): sliced_dataset = dataset.slice( keep_classes, discard_classes, keep_tasks, discard_tasks ) return else: sliced_dataset = dataset.slice( keep_classes, discard_classes, keep_tasks, discard_tasks ) x, _, _ = sliced_dataset.get_data() assert (np.unique(x) == np.array(ids)).all(), (np.unique(x), ids)
def test_continuum_to_pytorch_dataset(): x_train = np.random.randint(0, 255, size=(20, 32, 32, 3)) y_train = [] for i in range(10): y_train.append(np.ones(2) * i) y_train = np.concatenate(y_train) continuum_dataset = InMemoryDataset(x_train, y_train) task_set = continuum_dataset.to_taskset() loader = DataLoader(task_set, batch_size=32) c = 0 for x, y, _ in loader: pass
def test_slicing_list(list_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) taskset = scenario[list_tasks] targets = np.sort(np.unique(taskset._y)) assert len(targets) == len(list_tasks), print(f"{len(targets)} - vs - {len(list_tasks)}")
def test_HashedScenario_save_indexes(tmpdir, hash_name): num_tasks = 2 x, y = numpy_data() dataset = InMemoryDataset(x, y, None, data_type=TaskType.IMAGE_ARRAY) filename_indexes = os.path.join(tmpdir, f"{hash_name}.npy") if os.path.exists(filename_indexes): os.remove(filename_indexes) if os.path.exists(filename_indexes): AssertionError(f"{filename_indexes} should have been delete.") # test save the indexes array scenario = HashedScenario(cl_dataset=dataset, hash_name=hash_name, nb_tasks=num_tasks, filename_hash_indexes=filename_indexes) # test load the indexes array scenario = HashedScenario(cl_dataset=dataset, hash_name=hash_name, nb_tasks=num_tasks, filename_hash_indexes=filename_indexes) # delete test indexes os.remove(filename_indexes)
def test_get_task_transformation(numpy_data): x, y = numpy_data dummy = InMemoryDataset(x, y) Trsf_0 = [] Trsf_1 = [transforms.RandomAffine(degrees=[40, 50])] Trsf_2 = [transforms.RandomAffine(degrees=[85, 95])] dummy_transf = [Trsf_0, Trsf_1, Trsf_2] base_transformations = [ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ] scenario = TransformationIncremental( cl_dataset=dummy, incremental_transformations=dummy_transf, base_transformations=base_transformations) for task_id, taskset in enumerate(scenario): # first task specific transformation then base_transformation tot_transf_task = transforms.Compose(dummy_transf[task_id] + base_transformations) # we compare the str representation of the composition assert tot_transf_task.__repr__() == scenario.get_task_transformation( task_id).__repr__()
def test_slicing_list(list_tasks): train = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) assert subscenario.nb_tasks == len(list_tasks), print( f"{len(subscenario)} - vs - {len(list_tasks)}")
def scenario(): x = np.random.randn(100, 2) y = np.concatenate([np.ones(10) * i for i in range(10)]) t = None dataset = InMemoryDataset(x, y, t) return ClassIncremental(dataset, increment=2)
def test_init_shared_label_space(numpy_data, shared_label_space): x, y = numpy_data dummy = InMemoryDataset(x, y) Trsf_0 = [] Trsf_1 = [transforms.RandomAffine(degrees=[40, 50])] Trsf_2 = [transforms.RandomAffine(degrees=[85, 95])] dummy_transf = [Trsf_0, Trsf_1, Trsf_2] scenario = TransformationIncremental( cl_dataset=dummy, incremental_transformations=dummy_transf, shared_label_space=shared_label_space ) for task_id, taskset in enumerate(scenario): assert taskset.nb_classes == NB_CLASSES classes = taskset.get_classes() if shared_label_space: assert classes.max() == NB_CLASSES - 1 assert classes.min() == 0 else: assert classes.max() == (NB_CLASSES * (task_id + 1)) - 1 assert classes.min() == (NB_CLASSES * task_id)
def __init__( self, cl_dataset: _ContinuumDataset, hash_name, nb_tasks=None, transformations: Union[List[Callable], List[List[Callable]]] = None, filename_hash_indexes: Optional[str] = None, split_task="balanced" ) -> None: self.hash_name = hash_name self.split_task = split_task self._nb_tasks = nb_tasks if self.hash_name not in ["AverageHash", "Phash", "PhashSimple", "DhashH", "DhashV", "Whash", "ColorHash" ]: # , "CropResistantHash" AssertionError(f"{self.hash_name} is not a hash_name available.") if self.split_task not in ["balanced", "auto"]: AssertionError(f"{self.split_task} is not a data_split parameter available.") if split_task == "balanced" and nb_tasks is None: AssertionError(f"self.data_split is {self.split_task} the nb_tasks should be set.") self.data_type = cl_dataset.data_type self.filename_hash_indexes = filename_hash_indexes # "CropResistantHash" does not work yet # if self.hash_name == "CropResistantHash": # # auto (kmeans) does not work with hask format of CropResistantHash # self.split_task = "balanced" x, y, t = self.generate_task_ids(cl_dataset) cl_dataset = InMemoryDataset(x, y, t, data_type=self.data_type) super().__init__(cl_dataset=cl_dataset, transformations=transformations)
def create_subscenario(base_scenario, task_indexes): """ In this function we want to create a subscenario from the different tasks, either by subsampling tasks or reodering or both. """ new_x, new_y, new_t = None, None, None if base_scenario.cl_dataset.bounding_boxes is not None: raise ValueError( "the function create_subscenario is not compatible with scenario with bounding_boxes yet." ) for i, index in enumerate(task_indexes): taskset = base_scenario[index] all_task_indexes = np.arange(len(taskset)) x, y, t = taskset.get_raw_samples(all_task_indexes) t = np.ones(len(y)) * i if new_x is None: new_x = x new_y = y new_t = t else: new_x = np.concatenate([new_x, x], axis=0) new_y = np.concatenate([new_y, y], axis=0) new_t = np.concatenate([new_t, t], axis=0) dataset = InMemoryDataset(new_x, new_y, new_t, data_type=base_scenario.cl_dataset.data_type) return ContinualScenario(dataset)
def test_increments(increment, initial_increment, nb_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=increment, initial_increment=initial_increment) assert scenario.nb_tasks == nb_tasks seen_tasks = 0 for task_id, taskset in enumerate(scenario): seen_tasks += 1 if isinstance(increment, list): max_class = sum(increment[:task_id + 1]) min_class = sum(increment[:task_id]) elif initial_increment: max_class = initial_increment + increment * task_id min_class = initial_increment + increment * (task_id - 1) if task_id > 0 else 0 else: max_class = increment * (task_id + 1) min_class = increment * task_id for _ in DataLoader(taskset): pass assert np.max(taskset._y) == max_class - 1 assert np.min(taskset._y) == min_class assert seen_tasks == nb_tasks
def test_instance_default_nb_tasks(numpy_data_per_task, nb_tasks, nb_tasks_gt, catch): """Test the InstanceIncremental loader when the dataset does provide a default number of tasks.""" train, test = numpy_data_per_task x_train, y_train, t_train = train x_test, y_test, t_test = test dummy = InMemoryDataset(x_train, y_train, t_=t_train) has_raised = False try: scenario = InstanceIncremental(dummy, nb_tasks=nb_tasks) except Exception: has_raised = True if catch: assert has_raised return else: assert not has_raised nb_classes = scenario.nb_classes assert len(scenario) == nb_tasks_gt for task_id, train_dataset in enumerate(scenario): assert nb_classes == len(np.unique(train_dataset._y)) unique_pixels = np.unique(train_dataset._x) assert len(unique_pixels) == 1 and unique_pixels[0] == float(task_id)
def test_encode_scenario_inference_fct(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) train = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) model = nn.Linear(32 * 32 * 3, 50) inference_fct = lambda model, x: model(x.view(-1, 32 * 32 * 3)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=64, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks assert len(scenario[0]) == len(encoded_scenario[0]) assert encoded_scenario[0][0][0].shape[0] == 50 os.remove(filename_h5)
def test_encode_scenario(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) if torch.cuda.is_available(): inference_fct = (lambda model, x: model.to(torch.device('cuda:0')) (x.to(torch.device('cuda:0')))) else: inference_fct = (lambda model, x: model(x)) train = gen_data() x, y, t = train x = x.reshape(-1, 32 * 32 * 3) dummy = InMemoryDataset(x, y, t) scenario = ClassIncremental(dummy, increment=1) model = nn.Sequential(nn.Flatten(), nn.Linear(32 * 32 * 3, 50)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=64, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks assert len(scenario[0]) == len(encoded_scenario[0]) os.remove(filename_h5)
def create_dense_dataset(tmpdir, png=False, train=False): prefix = "dense" nb_samples = 20 x = np.random.randint(0, 255, (nb_samples, 3, 3, 3), dtype=np.uint8) y = np.zeros((nb_samples, 3, 3), dtype=np.uint8) y[:, 0, 0] = 255 y[:, 0, 1] = 0 y[:, 0, 2] = 1 y[:, 1, 0] = 2 y[:, 2, 0] = 3 y[:, 1, 1] = 4 x_paths, y_paths = [], [] for i in range(nb_samples): if png: x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png")) else: x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.jpg")) y_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png")) Image.fromarray(x[i]).save(x_paths[-1]) Image.fromarray(y[i]).save(y_paths[-1]) return InMemoryDataset(np.array(x_paths), np.array(y_paths), data_type="segmentation", train=train)
def create_dataset(tmpdir, prefix, png=False, train=True): nb_samples = 20 x = np.random.randint(0, 255, (nb_samples, 2, 2, 3), dtype=np.uint8) y = np.zeros((nb_samples, 2, 2), dtype=np.uint8) y[0:15, 0, 0] = 255 y[0:10, 0, 1] = 1 y[4:10, 1, 0] = 2 y[5:20, 0, 1] = 3 y[15:20, 1, 1] = 4 x_paths, y_paths = [], [] for i in range(nb_samples): if png: x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png")) else: x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.jpg")) y_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png")) Image.fromarray(x[i]).save(x_paths[-1]) Image.fromarray(y[i]).save(y_paths[-1]) return InMemoryDataset( np.array(x_paths), np.array(y_paths), data_type=TaskType.SEGMENTATION, train=train )
def test_init_fail2(numpy_data): train = numpy_data dummy = InMemoryDataset(*train) # No transformation is set with pytest.raises(TypeError): scenario = TransformationIncremental(cl_dataset=dummy)
def test_slicing_list_path_array(list_tasks): x_train, y_train = gen_string() dummy = InMemoryDataset(x_train, y_train, data_type=TaskType.IMAGE_PATH) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) assert subscenario.nb_tasks == len(list_tasks), print( f"{len(subscenario)} - vs - {len(list_tasks)}")
def test_scenario_remapping(): list_tasks = np.arange(9, -1, -1) x_train, y_train, t_train = gen_data() dummy = InMemoryDataset(x_train, y_train, t_train, data_type=TaskType.IMAGE_PATH) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) mapping = get_scenario_remapping(subscenario) np_classes = np.zeros(0) np_classes_remapped = np.zeros(0).astype(int) for taskset in subscenario: np_classes = np.concatenate( [np_classes, taskset.get_classes()], axis=0) np_classes_remapped = np.concatenate([ np_classes_remapped, remap_class_vector(taskset.get_classes(), mapping)[0] ], axis=0) nb_classes_seen = len(np_classes) assert np.equal(np_classes_remapped, np.arange(nb_classes_seen)).all() assert not np.equal(np_classes, np.arange(nb_classes_seen)).all()
def test_slicing_nc_no_end(start_index, classes): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=2) taskset = scenario[start_index:] targets = np.sort(np.unique(taskset._y)) assert len(targets) == len(classes) assert (targets == np.array(classes)).all(), (targets, classes)
def test_slicing_nc(index, classes): train, test = gen_data() dummy = InMemoryDataset(*train, *test) clloader = ClassIncremental(dummy, increment=2) dataset = clloader[index] targets = np.sort(np.unique(dataset.y)) assert len(targets) == len(classes) assert (targets == np.array(classes)).all(), (targets, classes)
def test_task_order_generator_nb_tasks(nb_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = TaskOrderGenerator(scenario) sample_scenario = scenario_generator.sample(nb_tasks=nb_tasks) assert sample_scenario.nb_tasks == nb_tasks
def fake_data(): x_train = np.random.randint(0, 255, size=(20, 32, 32, 3)) y_train = [] for i in range(NB_CLASSES): y_train.append(np.ones(2) * i) y_train = np.concatenate(y_train) return InMemoryDataset(x_train, y_train)
def test_split_train_val(val_split): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=5) for taskset in scenario: train_taskset, val_taskset = split_train_val(taskset, val_split=val_split) assert int(val_split * len(taskset)) == len(val_taskset) assert len(val_taskset) + len(train_taskset) == len(taskset)
def test_split_train_val(val_split): train, test = gen_data() dummy = InMemoryDataset(*train, *test) clloader = ClassIncremental(dummy, increment=5) for dataset in clloader: train_dataset, val_dataset = split_train_val(dataset, val_split=val_split) assert int(val_split * len(dataset)) == len(val_dataset) assert len(val_dataset) + len(train_dataset) == len(dataset)
def dataset(): x = np.random.randint(0, 255, (100, 4, 4, 3), dtype=np.uint8) y = np.random.randint(0, 3, (100,), dtype=np.int16) t = np.ones_like(y) t[:30] = 0 t[30:60] = 1 t[60:] = 2 return InMemoryDataset(x, y, t)
def test_tensor_type(increment, nb_tasks): train, test = gen_tensor_data() dummy = InMemoryDataset(*train, data_type="tensor") scenario = ClassIncremental(dummy, increment=increment) taskset = scenario[0] for x, y, t in DataLoader(taskset): continue assert scenario.nb_tasks == nb_tasks
def test_class_order_generator(seed): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = ClassOrderGenerator(scenario) sample_scenario = scenario_generator.sample(seed) class_order = scenario_generator.get_class_order(seed) assert (np.array(class_order) == np.array(sample_scenario.class_order)).all()
def test_tensor_type_get_samples(increment, nb_tasks): train, test = gen_tensor_data() dummy = InMemoryDataset(*train, data_type="tensor") scenario = ClassIncremental(dummy, increment=increment) taskset = scenario[0] for x, y, t in DataLoader(taskset): continue x, y, t = taskset.get_random_samples(5)
def test_class_order_generator(): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = ClassOrderGenerator(scenario) sample_scenario = scenario_generator.sample() assert sample_scenario.nb_tasks == scenario.nb_tasks assert sample_scenario.nb_classes == scenario.nb_classes assert (sample_scenario.classes == scenario.classes).all()