Exemple #1
0
def load_dataset_in_memory_and_resize(data_access, set, division, dataset_path, targets_path, tmp_size,
                                      final_size, batch_size):
    if data_access == "in-memory":
        with timer("Loading %s data"%set):
            dataset = InMemoryDataset(set, dataset_path, source_targets=targets_path, division=division)
            draw_data = np.copy(dataset.dataset)
            targets = np.copy(dataset.targets)
            del dataset
    elif data_access == "fuel":
        with timer("Loading %s data"%set):
            dataset = FuelDataset(set, tmp_size, batch_size=batch_size, shuffle=False, division=division)
            draw_data,targets = dataset.return_whole_dataset()
            del dataset
    else:
        raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access)

    if tmp_size != final_size:
        # Resize images from the validset
        out = np.zeros((draw_data.shape[0], final_size[0], final_size[1], final_size[2]), dtype="float32")
        with timer("Resizing %s images"%set):
            for i in range(draw_data.shape[0]):
                out[i] = resize_pil(draw_data[i], final_size[0:2])
        del draw_data
        return out, targets
    else:
        return draw_data, targets
def check_preprocessed_data(data_access, dataset, targets, batch_size, tmp_size, final_size, preprocessing_func,
                            preprocessing_args, n=10):
    if data_access=="in-memory":
        train_dataset = InMemoryDataset("train", source=dataset, batch_size=batch_size, source_targets=targets)
    elif data_access=="fuel":
        train_dataset = FuelDataset("test", tmp_size, batch_size=batch_size, division="leaderboard", shuffle=False)
    else:
        raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access)

    # Compute only one batch
    start=time.time()
    batch,batch_targets = train_dataset.get_batch()
    batch_targets = convert_labels(batch_targets)
    processed_batch = np.zeros((batch.shape[0],final_size[2],final_size[0],final_size[1]),
                                   dtype="float32")
    for k in range(batch_size):
        processed_batch[k] = preprocessing_func(batch[k], *preprocessing_args).transpose(2,0,1)
    end=time.time()

    print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype
    print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype
    for i in range(n):
        plt.figure(0)
        plt.gray()
        plt.clf()
        plt.title("(%d,%d)"%(batch_targets[i][0], batch_targets[i][1]))
        if batch.shape[1]==3:
            plt.imshow(processed_batch[i].transpose(1,2,0))
        else:
            plt.imshow(processed_batch[i,0])
        plt.show()
    print "Processing 1 batch took : %.5f"%(end-start)
Exemple #3
0
def check_preprocessed_data(data_access,
                            dataset,
                            targets,
                            batch_size,
                            tmp_size,
                            final_size,
                            preprocessing_func,
                            preprocessing_args,
                            n=10):
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("test",
                                    tmp_size,
                                    batch_size=batch_size,
                                    division="leaderboard",
                                    shuffle=False)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)

    # Compute only one batch
    start = time.time()
    batch, batch_targets = train_dataset.get_batch()
    batch_targets = convert_labels(batch_targets)
    processed_batch = np.zeros(
        (batch.shape[0], final_size[2], final_size[0], final_size[1]),
        dtype="float32")
    for k in range(batch_size):
        processed_batch[k] = preprocessing_func(batch[k],
                                                *preprocessing_args).transpose(
                                                    2, 0, 1)
    end = time.time()

    print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype
    print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype
    for i in range(n):
        plt.figure(0)
        plt.gray()
        plt.clf()
        plt.title("(%d,%d)" % (batch_targets[i][0], batch_targets[i][1]))
        if batch.shape[1] == 3:
            plt.imshow(processed_batch[i].transpose(1, 2, 0))
        else:
            plt.imshow(processed_batch[i, 0])
        plt.show()
    print "Processing 1 batch took : %.5f" % (end - start)
Exemple #4
0
def features_generator(data_access, dataset, targets, batch_size, tmp_size,
                       final_size, bagging_size, bagging_iterator,
                       multiple_input, preprocessing_func, preprocessing_args,
                       pretrained_model):
    # Instantiate the dataset
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    # Generator loop
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            features = pretrained_model.predict(processed_batch)
            yield features, labels
        else:
            features = pretrained_model.predict(
                [processed_batch for i in range(multiple_input)])
            yield features, labels
Exemple #5
0
def multi_features_generator(data_access,
                             dataset,
                             targets,
                             batch_size,
                             tmp_size,
                             final_size,
                             bagging_size,
                             bagging_iterator,
                             multiple_input,
                             preprocessing_func,
                             preprocessing_args,
                             pretrained_models,
                             mode="concat"):
    # Instantiate the dataset
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    # Generator loop
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            features = []
            for pretrained_model in pretrained_models:
                features.append(
                    pretrained_model.predict(processed_batch, batch_size=1))
            if mode == "concat":
                features = np.concatenate(features, axis=1)
            yield features, labels
        else:
            raise Exception("Generator does not work with multiple inputs")
Exemple #6
0
def images_generator(data_access, dataset, targets, batch_size, tmp_size,
                     final_size, bagging_size, bagging_iterator,
                     multiple_input, division, preprocessing_func,
                     preprocessing_args):
    """
    Generator function used when using the keras function 'fit_on_generator'. Can work with InMemoryDataset, FuelDataset.
    Yield a tuple to the training containing a processed batch and
    targets. This can be done on the CPU, in parallel of a GPU training. See 'fit_on_generator' for more details.

    :param data_access: "in-memory" or "fuel"
    :param dataset: path to the dataset numpy file (not used when data_acces = "fuel")
    :param targets: path to the targets numpy file (not used when data_acces = "fuel")
    :param batch_size:
    :param tmp_size: Used when data_access == "fuel". Datastream will return images of size equal to tmp_size.
    :param final_size: size of images used for the training
    :param preprocessing_func: function which will be applied to each training batch
    :param preprocessing_args: arguments of the preprocessing function
    :return: tuple(batch,targets)
    """
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets,
                                        division=division)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator,
                                    division=division)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            yield processed_batch, labels
        else:
            yield [processed_batch for i in range(multiple_input)], labels