Esempio n. 1
0
def create_subscenario(base_scenario, task_indexes):
    """
    In this function we want to create a subscenario from the different tasks, either by subsampling tasks or reodering
    or both.
    """

    new_x, new_y, new_t = None, None, None
    if base_scenario.cl_dataset.bounding_boxes is not None:
        raise ValueError(
            "the function create_subscenario is not compatible with scenario with bounding_boxes yet."
        )

    for i, index in enumerate(task_indexes):
        taskset = base_scenario[index]
        all_task_indexes = np.arange(len(taskset))
        x, y, t = taskset.get_raw_samples(all_task_indexes)
        t = np.ones(len(y)) * i
        if new_x is None:
            new_x = x
            new_y = y
            new_t = t
        else:
            new_x = np.concatenate([new_x, x], axis=0)
            new_y = np.concatenate([new_y, y], axis=0)
            new_t = np.concatenate([new_t, t], axis=0)
    dataset = InMemoryDataset(new_x,
                              new_y,
                              new_t,
                              data_type=base_scenario.cl_dataset.data_type)

    return ContinualScenario(dataset)
Esempio n. 2
0
def test_h5dataset_add_data(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)
    h5dataset.add_data(x_, y_, t_)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    assert scenario.nb_tasks == nb_task
Esempio n. 3
0
def test_scenario():
    x = np.ones((100, 4, 4, 3), dtype=np.uint8)
    y = np.arange(100) // 5

    nb_tasks = 10
    t = np.random.randint(nb_tasks, size=100)

    dummy = InMemoryDataset(x, y, t)
    scenario = ContinualScenario(dummy)

    assert scenario.nb_tasks == nb_tasks
Esempio n. 4
0
def test_bad_task_ids():
    x = np.ones((100, 4, 4, 3), dtype=np.uint8)
    y = np.arange(100) // 5
    nb_tasks = 10
    # test if one missing generate an error
    t = np.random.randint(10, size=100)
    t = t + np.ones(100)  # shift indexes from [0 - 9] to [1 - 10]

    dummy = InMemoryDataset(x, y, t)

    with pytest.raises(Exception):
        scenario = ContinualScenario(dummy)
Esempio n. 5
0
def test_scenario_CIFAR100_Scenarios():
    dataset = CIFAR100(DATA_PATH,
                       train=True,
                       labels_type="category",
                       task_labels="category")
    scenario = ContinualScenario(dataset)
    assert scenario.nb_classes == 20
    assert scenario.nb_tasks == 20

    dataset = CIFAR100(DATA_PATH,
                       train=True,
                       labels_type="category",
                       task_labels="class")
    scenario = ContinualScenario(dataset)
    assert scenario.nb_classes == 20
    assert scenario.nb_tasks == 100

    dataset = CIFAR100(DATA_PATH,
                       train=True,
                       labels_type="class",
                       task_labels="class")
    scenario = ContinualScenario(dataset)
    assert scenario.nb_classes == 100
    assert scenario.nb_tasks == 100

    dataset = CIFAR100(DATA_PATH,
                       train=True,
                       labels_type="class",
                       task_labels="category")
    scenario = ContinualScenario(dataset)
    assert scenario.nb_classes == 100
    assert scenario.nb_tasks == 20

    dataset = CIFAR100(DATA_PATH,
                       train=True,
                       labels_type="category",
                       task_labels="lifelong")
    scenario = ContinualScenario(dataset)
    assert scenario.nb_classes == 20
    assert scenario.nb_tasks == 5
Esempio n. 6
0
def test_h5dataset_ContinualScenario(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    assert scenario.nb_tasks == nb_task

    data_indexes = np.where(t_ == 0)[0]
    assert len(data_indexes) == len(scenario[0])
Esempio n. 7
0
def test_h5dataset_loading(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    for task_set in scenario:
        loader = DataLoader(task_set)
        for _ in loader:
            pass

    assert scenario.nb_tasks == nb_task
Esempio n. 8
0
def test_h5dataset_get_raw(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    for task_set in scenario:
        indexes = np.random.randint(len(task_set), size=len(task_set) // 2)
        _, _, _ = task_set.get_raw_samples(indexes.sort())
        # test with no indexes
        _, _, _ = task_set.get_raw_samples()

    assert scenario.nb_tasks == nb_task
Esempio n. 9
0
def test_create_subscenario_h5dataset(data, tmpdir):
    from continuum.scenarios import create_subscenario
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    sub_scenario = create_subscenario(scenario, np.arange(nb_task - 1))

    for task_set in sub_scenario:
        loader = DataLoader(task_set)
        for _ in loader:
            pass

    assert sub_scenario.nb_tasks == nb_task - 1
Esempio n. 10
0
def encode_scenario(scenario, model, batch_size, filename, inference_fct=None):
    """This function created an encoded scenario dataset and convert it into a ContinualScenario.

    :param model: model to encode the data.
    :param scenario: scenario to encode.
    :param batch_size: batch size to load data.
    :param filename: filename for the h5 dataset.
    :param inference_fct: A function that make possible to have a sophisticate way to get features.
    """

    if os.path.isfile(filename):
        raise ValueError(f"File name: {filename} already exists")

    print(f"Encoding {filename}.")
    encoded_dataset = encode_into_dataset(model, scenario, batch_size,
                                          filename, inference_fct)
    print(f"Encoding is done.")

    return ContinualScenario(encoded_dataset)
Esempio n. 11
0
def test_h5dataset_split_train_test(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    for task_set in scenario:
        task_set_tr, task_set_val = split_train_val(task_set)
        loader_tr = DataLoader(task_set_tr)
        for _ in loader_tr:
            pass
        loader_val = DataLoader(task_set_val)
        for _ in loader_val:
            pass

    assert scenario.nb_tasks == nb_task
Esempio n. 12
0
def test_on_array_dataset(tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_CIFAR100_h5.hdf5")

    cl_dataset = CIFAR100(data_path=DATA_PATH,
                          download=False,
                          train=True,
                          labels_type="category",
                          task_labels="lifelong")
    # in practice the construction is part by part to reduce data load but here we do it at once
    x, y, t = cl_dataset.get_data()
    h5dataset = H5Dataset(x, y, t, data_path=filename_h5)

    scenario = ContinualScenario(h5dataset)

    for task_set in scenario:
        loader = DataLoader(task_set, batch_size=64)
        for x, y, t in loader:
            assert x.shape == torch.Size([64, 3, 32, 32])
            break

    assert scenario.nb_tasks == 5  # number of task of CIFAR100Lifelong
Esempio n. 13
0
def test_h5dataset_reloading(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    # create dataset
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)
    # destroy object
    del h5dataset

    # reload data set
    h5dataset_reloaded = H5Dataset(x=None, y=None, t=None, data_path=filename_h5)

    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset_reloaded)

    for task_set in scenario:
        loader = DataLoader(task_set)
        for _ in loader:
            pass

    assert scenario.nb_tasks == nb_task
Esempio n. 14
0
def test_create_subscenario_suffle_h5dataset(data, tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    x_, y_, t_ = data
    h5dataset = H5Dataset(x_, y_, t_, data_path=filename_h5)


    nb_task = len(np.unique(t_))
    scenario = ContinualScenario(h5dataset)

    task_order = np.arange(nb_task)
    np.random.shuffle(task_order)


    sub_scenario = create_subscenario(scenario, task_order)

    for task_set in sub_scenario:
        loader = DataLoader(task_set)
        for _ in loader:
            pass

    assert sub_scenario.nb_tasks == nb_task
Esempio n. 15
0
def test_h5dataset_reloading_slow(tmpdir):
    filename_h5 = os.path.join(tmpdir, "test_h5.hdf5")

    nb_tasks = 5

    cl_dataset = CIFAR100(data_path=DATA_PATH,
                          download=False,
                          train=True,
                          labels_type="category",
                          task_labels="lifelong")
    x, y, t = cl_dataset.get_data()

    # create dataset
    h5dataset = H5Dataset(x, y, t, data_path=filename_h5)
    # destroy object
    del h5dataset

    # reload data set
    h5dataset_reloaded = H5Dataset(x=None, y=None, t=None, data_path=filename_h5)

    scenario = ContinualScenario(h5dataset_reloaded)

    for task_set in scenario:
        loader = DataLoader(task_set)
        for _ in loader:
            pass

    assert scenario.nb_tasks == nb_tasks

    task_order = np.arange(nb_tasks)

    sub_scenario = create_subscenario(scenario, task_order[:-1])

    assert sub_scenario.nb_tasks == nb_tasks-1


    np.random.shuffle(task_order)
    sub_scenario = create_subscenario(scenario, task_order)
    assert sub_scenario.nb_tasks == nb_tasks
Esempio n. 16
0
def create_subscenario(base_scenario, task_indexes):
    """
    In this function we want to create a subscenario from the different tasks, either by subsampling tasks or reodering
    or both.

    :param base_scenario: scenario from which the subscenario will be created
    :param task_indexes: array with new order of tasks
    :return: A train PyTorch's Datasets.
    """

    if torch.is_tensor(task_indexes):
        task_indexes = task_indexes.numpy()

    if base_scenario.transformations is not None and isinstance(
            base_scenario.transformations[0], list):
        transformations = [
            base_scenario.transformations[i] for i in task_indexes
        ]
    else:
        transformations = base_scenario.transformations
    sub_scenario = None

    if isinstance(base_scenario, OnlineFellowship):
        # We just want to changes base_scenario.cl_datasets order
        new_cl_datasets = [base_scenario.cl_datasets[i] for i in task_indexes]
        sub_scenario = OnlineFellowship(
            new_cl_datasets,
            transformations=transformations,
            update_labels=base_scenario.update_labels)
    elif base_scenario.cl_dataset.data_type == TaskType.H5:
        list_taskset = [base_scenario[i] for i in task_indexes]
        sub_scenario = OnlineFellowship(list_taskset,
                                        transformations=transformations,
                                        update_labels=False)
    else:
        new_x, new_y, new_t = None, None, None
        if base_scenario.cl_dataset.bounding_boxes is not None:
            raise ValueError(
                "the function create_subscenario is not compatible with scenario with bounding_boxes yet."
            )
        for i, index in enumerate(task_indexes):
            taskset = base_scenario[index]
            all_task_indexes = np.arange(len(taskset))
            x, y, t = taskset.get_raw_samples(all_task_indexes)
            t = np.ones(len(y)) * i
            if new_x is None:
                new_x = x
                new_y = y
                new_t = t
            else:
                new_x = np.concatenate([new_x, x], axis=0)
                new_y = np.concatenate([new_y, y], axis=0)
                new_t = np.concatenate([new_t, t], axis=0)
        dataset = InMemoryDataset(new_x,
                                  new_y,
                                  new_t,
                                  data_type=base_scenario.cl_dataset.data_type)
        sub_scenario = ContinualScenario(dataset,
                                         transformations=transformations)

    return sub_scenario