Exemple #1
0
def test_slice(
        dataset,
        keep_classes, discard_classes,
        keep_tasks, discard_tasks,
        error,
        ids
    ):

    dataset = InMemoryDataset(*dataset)

    if error:
        with pytest.raises(Exception):
            sliced_dataset = dataset.slice(
                keep_classes, discard_classes,
                keep_tasks, discard_tasks
            )
        return
    else:
        sliced_dataset = dataset.slice(
            keep_classes, discard_classes,
            keep_tasks, discard_tasks
        )

    x, _, _ = sliced_dataset.get_data()

    assert (np.unique(x) == np.array(ids)).all(), (np.unique(x), ids)
Exemple #2
0
def test_continuum_to_pytorch_dataset():
    x_train = np.random.randint(0, 255, size=(20, 32, 32, 3))
    y_train = []
    for i in range(10):
        y_train.append(np.ones(2) * i)
    y_train = np.concatenate(y_train)

    continuum_dataset = InMemoryDataset(x_train, y_train)
    task_set = continuum_dataset.to_taskset()

    loader = DataLoader(task_set, batch_size=32)

    c = 0
    for x, y, _ in loader:
        pass
Exemple #3
0
def test_slicing_list(list_tasks):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)
    taskset = scenario[list_tasks]
    targets = np.sort(np.unique(taskset._y))
    assert len(targets) == len(list_tasks), print(f"{len(targets)} - vs - {len(list_tasks)}")
Exemple #4
0
def test_HashedScenario_save_indexes(tmpdir, hash_name):
    num_tasks = 2
    x, y = numpy_data()
    dataset = InMemoryDataset(x, y, None, data_type=TaskType.IMAGE_ARRAY)

    filename_indexes = os.path.join(tmpdir, f"{hash_name}.npy")
    if os.path.exists(filename_indexes):
        os.remove(filename_indexes)

    if os.path.exists(filename_indexes):
        AssertionError(f"{filename_indexes} should have been delete.")

    # test save the indexes array
    scenario = HashedScenario(cl_dataset=dataset,
                              hash_name=hash_name,
                              nb_tasks=num_tasks,
                              filename_hash_indexes=filename_indexes)

    # test load the indexes array
    scenario = HashedScenario(cl_dataset=dataset,
                              hash_name=hash_name,
                              nb_tasks=num_tasks,
                              filename_hash_indexes=filename_indexes)

    # delete test indexes
    os.remove(filename_indexes)
Exemple #5
0
def test_get_task_transformation(numpy_data):
    x, y = numpy_data
    dummy = InMemoryDataset(x, y)

    Trsf_0 = []
    Trsf_1 = [transforms.RandomAffine(degrees=[40, 50])]
    Trsf_2 = [transforms.RandomAffine(degrees=[85, 95])]

    dummy_transf = [Trsf_0, Trsf_1, Trsf_2]

    base_transformations = [
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ]

    scenario = TransformationIncremental(
        cl_dataset=dummy,
        incremental_transformations=dummy_transf,
        base_transformations=base_transformations)

    for task_id, taskset in enumerate(scenario):
        # first task specific transformation then base_transformation
        tot_transf_task = transforms.Compose(dummy_transf[task_id] +
                                             base_transformations)

        # we compare the str representation of the composition
        assert tot_transf_task.__repr__() == scenario.get_task_transformation(
            task_id).__repr__()
def test_slicing_list(list_tasks):
    train = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)
    subscenario = create_subscenario(scenario, list_tasks)
    assert subscenario.nb_tasks == len(list_tasks), print(
        f"{len(subscenario)} - vs - {len(list_tasks)}")
def scenario():
    x = np.random.randn(100, 2)
    y = np.concatenate([np.ones(10) * i for i in range(10)])
    t = None

    dataset = InMemoryDataset(x, y, t)
    return ClassIncremental(dataset, increment=2)
def test_init_shared_label_space(numpy_data, shared_label_space):
    x, y = numpy_data
    dummy = InMemoryDataset(x, y)

    Trsf_0 = []
    Trsf_1 = [transforms.RandomAffine(degrees=[40, 50])]
    Trsf_2 = [transforms.RandomAffine(degrees=[85, 95])]

    dummy_transf = [Trsf_0, Trsf_1, Trsf_2]

    scenario = TransformationIncremental(
        cl_dataset=dummy,
        incremental_transformations=dummy_transf,
        shared_label_space=shared_label_space
    )

    for task_id, taskset in enumerate(scenario):
        assert taskset.nb_classes == NB_CLASSES
        classes = taskset.get_classes()
        if shared_label_space:
            assert classes.max() == NB_CLASSES - 1
            assert classes.min() == 0
        else:
            assert classes.max() == (NB_CLASSES * (task_id + 1)) - 1
            assert classes.min() == (NB_CLASSES * task_id)
Exemple #9
0
    def __init__(
            self,
            cl_dataset: _ContinuumDataset,
            hash_name,
            nb_tasks=None,
            transformations: Union[List[Callable], List[List[Callable]]] = None,
            filename_hash_indexes: Optional[str] = None,
            split_task="balanced"
    ) -> None:
        self.hash_name = hash_name
        self.split_task = split_task
        self._nb_tasks = nb_tasks

        if self.hash_name not in ["AverageHash", "Phash", "PhashSimple", "DhashH", "DhashV", "Whash", "ColorHash"
                                  ]: # , "CropResistantHash"
            AssertionError(f"{self.hash_name} is not a hash_name available.")
        if self.split_task not in ["balanced", "auto"]:
            AssertionError(f"{self.split_task} is not a data_split parameter available.")
        if split_task == "balanced" and nb_tasks is None:
            AssertionError(f"self.data_split is {self.split_task} the nb_tasks should be set.")

        self.data_type = cl_dataset.data_type
        self.filename_hash_indexes = filename_hash_indexes

        # "CropResistantHash" does not work yet
        # if self.hash_name == "CropResistantHash":
        #     # auto (kmeans) does not work with hask format of CropResistantHash
        #     self.split_task = "balanced"

        x, y, t = self.generate_task_ids(cl_dataset)
        cl_dataset = InMemoryDataset(x, y, t, data_type=self.data_type)
        super().__init__(cl_dataset=cl_dataset, transformations=transformations)
Exemple #10
0
def create_subscenario(base_scenario, task_indexes):
    """
    In this function we want to create a subscenario from the different tasks, either by subsampling tasks or reodering
    or both.
    """

    new_x, new_y, new_t = None, None, None
    if base_scenario.cl_dataset.bounding_boxes is not None:
        raise ValueError(
            "the function create_subscenario is not compatible with scenario with bounding_boxes yet."
        )

    for i, index in enumerate(task_indexes):
        taskset = base_scenario[index]
        all_task_indexes = np.arange(len(taskset))
        x, y, t = taskset.get_raw_samples(all_task_indexes)
        t = np.ones(len(y)) * i
        if new_x is None:
            new_x = x
            new_y = y
            new_t = t
        else:
            new_x = np.concatenate([new_x, x], axis=0)
            new_y = np.concatenate([new_y, y], axis=0)
            new_t = np.concatenate([new_t, t], axis=0)
    dataset = InMemoryDataset(new_x,
                              new_y,
                              new_t,
                              data_type=base_scenario.cl_dataset.data_type)

    return ContinualScenario(dataset)
def test_increments(increment, initial_increment, nb_tasks):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=increment, initial_increment=initial_increment)

    assert scenario.nb_tasks == nb_tasks
    seen_tasks = 0

    for task_id, taskset in enumerate(scenario):
        seen_tasks += 1

        if isinstance(increment, list):
            max_class = sum(increment[:task_id + 1])
            min_class = sum(increment[:task_id])
        elif initial_increment:
            max_class = initial_increment + increment * task_id
            min_class = initial_increment + increment * (task_id - 1) if task_id > 0 else 0
        else:
            max_class = increment * (task_id + 1)
            min_class = increment * task_id

        for _ in DataLoader(taskset):
            pass

        assert np.max(taskset._y) == max_class - 1
        assert np.min(taskset._y) == min_class
    assert seen_tasks == nb_tasks
Exemple #12
0
def test_instance_default_nb_tasks(numpy_data_per_task, nb_tasks, nb_tasks_gt, catch):
    """Test the InstanceIncremental loader when the dataset does provide
    a default number of tasks."""
    train, test = numpy_data_per_task

    x_train, y_train, t_train = train
    x_test, y_test, t_test = test

    dummy = InMemoryDataset(x_train, y_train, t_=t_train)

    has_raised = False
    try:
        scenario = InstanceIncremental(dummy, nb_tasks=nb_tasks)
    except Exception:
        has_raised = True

    if catch:
        assert has_raised
        return
    else:
        assert not has_raised

    nb_classes = scenario.nb_classes

    assert len(scenario) == nb_tasks_gt
    for task_id, train_dataset in enumerate(scenario):
        assert nb_classes == len(np.unique(train_dataset._y))

        unique_pixels = np.unique(train_dataset._x)
        assert len(unique_pixels) == 1 and unique_pixels[0] == float(task_id)
def test_encode_scenario_inference_fct():
    filename_h5 = "test_encode_scenario.hdf5"
    if os.path.exists(filename_h5):
        os.remove(filename_h5)

    train = gen_data()

    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)

    model = nn.Linear(32 * 32 * 3, 50)
    inference_fct = lambda model, x: model(x.view(-1, 32 * 32 * 3))

    encoded_scenario = encode_scenario(model=model,
                                       scenario=scenario,
                                       batch_size=64,
                                       filename=filename_h5,
                                       inference_fct=inference_fct)

    assert scenario.nb_tasks == encoded_scenario.nb_tasks
    assert len(scenario[0]) == len(encoded_scenario[0])

    assert encoded_scenario[0][0][0].shape[0] == 50

    os.remove(filename_h5)
def test_encode_scenario():
    filename_h5 = "test_encode_scenario.hdf5"
    if os.path.exists(filename_h5):
        os.remove(filename_h5)

    if torch.cuda.is_available():
        inference_fct = (lambda model, x: model.to(torch.device('cuda:0'))
                         (x.to(torch.device('cuda:0'))))
    else:
        inference_fct = (lambda model, x: model(x))

    train = gen_data()
    x, y, t = train
    x = x.reshape(-1, 32 * 32 * 3)

    dummy = InMemoryDataset(x, y, t)
    scenario = ClassIncremental(dummy, increment=1)

    model = nn.Sequential(nn.Flatten(), nn.Linear(32 * 32 * 3, 50))
    encoded_scenario = encode_scenario(model=model,
                                       scenario=scenario,
                                       batch_size=64,
                                       filename=filename_h5,
                                       inference_fct=inference_fct)

    assert scenario.nb_tasks == encoded_scenario.nb_tasks
    assert len(scenario[0]) == len(encoded_scenario[0])

    os.remove(filename_h5)
Exemple #15
0
def create_dense_dataset(tmpdir, png=False, train=False):
    prefix = "dense"
    nb_samples = 20

    x = np.random.randint(0, 255, (nb_samples, 3, 3, 3), dtype=np.uint8)
    y = np.zeros((nb_samples, 3, 3), dtype=np.uint8)
    y[:, 0, 0] = 255
    y[:, 0, 1] = 0
    y[:, 0, 2] = 1
    y[:, 1, 0] = 2
    y[:, 2, 0] = 3
    y[:, 1, 1] = 4

    x_paths, y_paths = [], []
    for i in range(nb_samples):
        if png:
            x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png"))
        else:
            x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.jpg"))
        y_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png"))

        Image.fromarray(x[i]).save(x_paths[-1])
        Image.fromarray(y[i]).save(y_paths[-1])

    return InMemoryDataset(np.array(x_paths),
                           np.array(y_paths),
                           data_type="segmentation",
                           train=train)
def create_dataset(tmpdir, prefix, png=False, train=True):
    nb_samples = 20

    x = np.random.randint(0, 255, (nb_samples, 2, 2, 3), dtype=np.uint8)
    y = np.zeros((nb_samples, 2, 2), dtype=np.uint8)
    y[0:15, 0, 0] = 255

    y[0:10, 0, 1] = 1
    y[4:10, 1, 0] = 2
    y[5:20, 0, 1] = 3
    y[15:20, 1, 1] = 4

    x_paths, y_paths = [], []
    for i in range(nb_samples):
        if png:
            x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png"))
        else:
            x_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.jpg"))
        y_paths.append(os.path.join(tmpdir, f"{prefix}_{i}.png"))

        Image.fromarray(x[i]).save(x_paths[-1])
        Image.fromarray(y[i]).save(y_paths[-1])

    return InMemoryDataset(
        np.array(x_paths), np.array(y_paths),
        data_type=TaskType.SEGMENTATION,
        train=train
    )
Exemple #17
0
def test_init_fail2(numpy_data):
    train = numpy_data
    dummy = InMemoryDataset(*train)

    # No transformation is set
    with pytest.raises(TypeError):
        scenario = TransformationIncremental(cl_dataset=dummy)
def test_slicing_list_path_array(list_tasks):
    x_train, y_train = gen_string()
    dummy = InMemoryDataset(x_train, y_train, data_type=TaskType.IMAGE_PATH)
    scenario = ClassIncremental(dummy, increment=1)
    subscenario = create_subscenario(scenario, list_tasks)
    assert subscenario.nb_tasks == len(list_tasks), print(
        f"{len(subscenario)} - vs - {len(list_tasks)}")
def test_scenario_remapping():
    list_tasks = np.arange(9, -1, -1)
    x_train, y_train, t_train = gen_data()
    dummy = InMemoryDataset(x_train,
                            y_train,
                            t_train,
                            data_type=TaskType.IMAGE_PATH)
    scenario = ClassIncremental(dummy, increment=1)
    subscenario = create_subscenario(scenario, list_tasks)

    mapping = get_scenario_remapping(subscenario)
    np_classes = np.zeros(0)
    np_classes_remapped = np.zeros(0).astype(int)
    for taskset in subscenario:
        np_classes = np.concatenate(
            [np_classes, taskset.get_classes()], axis=0)
        np_classes_remapped = np.concatenate([
            np_classes_remapped,
            remap_class_vector(taskset.get_classes(), mapping)[0]
        ],
                                             axis=0)
    nb_classes_seen = len(np_classes)

    assert np.equal(np_classes_remapped, np.arange(nb_classes_seen)).all()
    assert not np.equal(np_classes, np.arange(nb_classes_seen)).all()
Exemple #20
0
def test_slicing_nc_no_end(start_index, classes):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=2)
    taskset = scenario[start_index:]
    targets = np.sort(np.unique(taskset._y))
    assert len(targets) == len(classes)
    assert (targets == np.array(classes)).all(), (targets, classes)
Exemple #21
0
def test_slicing_nc(index, classes):
    train, test = gen_data()
    dummy = InMemoryDataset(*train, *test)
    clloader = ClassIncremental(dummy, increment=2)
    dataset = clloader[index]
    targets = np.sort(np.unique(dataset.y))
    assert len(targets) == len(classes)
    assert (targets == np.array(classes)).all(), (targets, classes)
Exemple #22
0
def test_task_order_generator_nb_tasks(nb_tasks):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)
    scenario_generator = TaskOrderGenerator(scenario)
    sample_scenario = scenario_generator.sample(nb_tasks=nb_tasks)

    assert sample_scenario.nb_tasks == nb_tasks
Exemple #23
0
def fake_data():
    x_train = np.random.randint(0, 255, size=(20, 32, 32, 3))
    y_train = []
    for i in range(NB_CLASSES):
        y_train.append(np.ones(2) * i)
    y_train = np.concatenate(y_train)

    return InMemoryDataset(x_train, y_train)
def test_split_train_val(val_split):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=5)

    for taskset in scenario:
        train_taskset, val_taskset = split_train_val(taskset, val_split=val_split)
        assert int(val_split * len(taskset)) == len(val_taskset)
        assert len(val_taskset) + len(train_taskset) == len(taskset)
Exemple #25
0
def test_split_train_val(val_split):
    train, test = gen_data()
    dummy = InMemoryDataset(*train, *test)
    clloader = ClassIncremental(dummy, increment=5)

    for dataset in clloader:
        train_dataset, val_dataset = split_train_val(dataset, val_split=val_split)
        assert int(val_split * len(dataset)) == len(val_dataset)
        assert len(val_dataset) + len(train_dataset) == len(dataset)
def dataset():
    x = np.random.randint(0, 255, (100, 4, 4, 3), dtype=np.uint8)
    y = np.random.randint(0, 3, (100,), dtype=np.int16)
    t = np.ones_like(y)

    t[:30] = 0
    t[30:60] = 1
    t[60:] = 2

    return InMemoryDataset(x, y, t)
def test_tensor_type(increment, nb_tasks):
    train, test = gen_tensor_data()
    dummy = InMemoryDataset(*train, data_type="tensor")
    scenario = ClassIncremental(dummy, increment=increment)

    taskset = scenario[0]
    for x, y, t in DataLoader(taskset):
        continue

    assert scenario.nb_tasks == nb_tasks
Exemple #28
0
def test_class_order_generator(seed):
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)

    scenario_generator = ClassOrderGenerator(scenario)
    sample_scenario = scenario_generator.sample(seed)
    class_order = scenario_generator.get_class_order(seed)

    assert (np.array(class_order) == np.array(sample_scenario.class_order)).all()
def test_tensor_type_get_samples(increment, nb_tasks):
    train, test = gen_tensor_data()
    dummy = InMemoryDataset(*train, data_type="tensor")
    scenario = ClassIncremental(dummy, increment=increment)

    taskset = scenario[0]
    for x, y, t in DataLoader(taskset):
        continue

    x, y, t = taskset.get_random_samples(5)
Exemple #30
0
def test_class_order_generator():
    train, test = gen_data()
    dummy = InMemoryDataset(*train)
    scenario = ClassIncremental(dummy, increment=1)

    scenario_generator = ClassOrderGenerator(scenario)
    sample_scenario = scenario_generator.sample()

    assert sample_scenario.nb_tasks == scenario.nb_tasks
    assert sample_scenario.nb_classes == scenario.nb_classes
    assert (sample_scenario.classes == scenario.classes).all()