Example #1
0
def load_dataset_in_memory_and_resize(data_access, set, division, dataset_path, targets_path, tmp_size,
                                      final_size, batch_size):
    if data_access == "in-memory":
        with timer("Loading %s data"%set):
            dataset = InMemoryDataset(set, dataset_path, source_targets=targets_path, division=division)
            draw_data = np.copy(dataset.dataset)
            targets = np.copy(dataset.targets)
            del dataset
    elif data_access == "fuel":
        with timer("Loading %s data"%set):
            dataset = FuelDataset(set, tmp_size, batch_size=batch_size, shuffle=False, division=division)
            draw_data,targets = dataset.return_whole_dataset()
            del dataset
    else:
        raise Exception("Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."%data_access)

    if tmp_size != final_size:
        # Resize images from the validset
        out = np.zeros((draw_data.shape[0], final_size[0], final_size[1], final_size[2]), dtype="float32")
        with timer("Resizing %s images"%set):
            for i in range(draw_data.shape[0]):
                out[i] = resize_pil(draw_data[i], final_size[0:2])
        del draw_data
        return out, targets
    else:
        return draw_data, targets
Example #2
0
def features_generator(data_access, dataset, targets, batch_size, tmp_size,
                       final_size, bagging_size, bagging_iterator,
                       multiple_input, preprocessing_func, preprocessing_args,
                       pretrained_model):
    # Instantiate the dataset
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    # Generator loop
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            features = pretrained_model.predict(processed_batch)
            yield features, labels
        else:
            features = pretrained_model.predict(
                [processed_batch for i in range(multiple_input)])
            yield features, labels
Example #3
0
def check_preprocessed_data(data_access,
                            dataset,
                            targets,
                            batch_size,
                            tmp_size,
                            final_size,
                            preprocessing_func,
                            preprocessing_args,
                            n=10):
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("test",
                                    tmp_size,
                                    batch_size=batch_size,
                                    division="leaderboard",
                                    shuffle=False)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)

    # Compute only one batch
    start = time.time()
    batch, batch_targets = train_dataset.get_batch()
    batch_targets = convert_labels(batch_targets)
    processed_batch = np.zeros(
        (batch.shape[0], final_size[2], final_size[0], final_size[1]),
        dtype="float32")
    for k in range(batch_size):
        processed_batch[k] = preprocessing_func(batch[k],
                                                *preprocessing_args).transpose(
                                                    2, 0, 1)
    end = time.time()

    print "Batch Shape = ", processed_batch.shape, "with dtype =", processed_batch.dtype
    print "Targets Shape =", batch_targets.shape, "with dtype =", batch_targets.dtype
    for i in range(n):
        plt.figure(0)
        plt.gray()
        plt.clf()
        plt.title("(%d,%d)" % (batch_targets[i][0], batch_targets[i][1]))
        if batch.shape[1] == 3:
            plt.imshow(processed_batch[i].transpose(1, 2, 0))
        else:
            plt.imshow(processed_batch[i, 0])
        plt.show()
    print "Processing 1 batch took : %.5f" % (end - start)
Example #4
0
def multi_features_generator(data_access,
                             dataset,
                             targets,
                             batch_size,
                             tmp_size,
                             final_size,
                             bagging_size,
                             bagging_iterator,
                             multiple_input,
                             preprocessing_func,
                             preprocessing_args,
                             pretrained_models,
                             mode="concat"):
    # Instantiate the dataset
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    # Generator loop
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            features = []
            for pretrained_model in pretrained_models:
                features.append(
                    pretrained_model.predict(processed_batch, batch_size=1))
            if mode == "concat":
                features = np.concatenate(features, axis=1)
            yield features, labels
        else:
            raise Exception("Generator does not work with multiple inputs")
Example #5
0
def images_generator(data_access, dataset, targets, batch_size, tmp_size,
                     final_size, bagging_size, bagging_iterator,
                     multiple_input, division, preprocessing_func,
                     preprocessing_args):
    """
    Generator function used when using the keras function 'fit_on_generator'. Can work with InMemoryDataset, FuelDataset.
    Yield a tuple to the training containing a processed batch and
    targets. This can be done on the CPU, in parallel of a GPU training. See 'fit_on_generator' for more details.

    :param data_access: "in-memory" or "fuel"
    :param dataset: path to the dataset numpy file (not used when data_acces = "fuel")
    :param targets: path to the targets numpy file (not used when data_acces = "fuel")
    :param batch_size:
    :param tmp_size: Used when data_access == "fuel". Datastream will return images of size equal to tmp_size.
    :param final_size: size of images used for the training
    :param preprocessing_func: function which will be applied to each training batch
    :param preprocessing_args: arguments of the preprocessing function
    :return: tuple(batch,targets)
    """
    if data_access == "in-memory":
        train_dataset = InMemoryDataset("train",
                                        source=dataset,
                                        batch_size=batch_size,
                                        source_targets=targets,
                                        division=division)
    elif data_access == "fuel":
        train_dataset = FuelDataset("train",
                                    tmp_size,
                                    batch_size=batch_size,
                                    bagging=bagging_size,
                                    bagging_iterator=bagging_iterator,
                                    division=division)
    else:
        raise Exception(
            "Data access not available. Must be 'fuel' or 'in-memory'. Here : %s."
            % data_access)
    while 1:
        # Get next batch
        processed_batch, labels = get_next_batch(train_dataset, batch_size,
                                                 final_size,
                                                 preprocessing_func,
                                                 preprocessing_args)
        if multiple_input == 1:
            yield processed_batch, labels
        else:
            yield [processed_batch for i in range(multiple_input)], labels
Example #6
0
def multiscale_predict(model,
                       training_params,
                       division="leaderboard",
                       verbose=False):
    initial_input_shape = model.input_shape
    k = 0
    for test_size in training_params.test_sizes:
        if verbose:
            print "\nTesting for size :" + str(test_size)
        # Get the best model
        if test_size[0] != model.input_shape[2] or test_size[
                1] != model.input_shape[3]:
            new_model = adapt_to_new_input(
                model, (test_size[2], test_size[0], test_size[1]),
                initial_input_shape[1:],
                verbose=True)
        else:
            new_model = model
        testset = FuelDataset("test",
                              test_size,
                              batch_size=training_params.test_batch_size,
                              shuffle=False,
                              division=division)
        preds, labels = predict(new_model,
                                testset,
                                training_params,
                                flip=False,
                                verbose=verbose)
        if k == 0:
            final_preds = np.copy(preds)
        else:
            final_preds += preds
        k += 1.0
        # Predictions on the flipped testset
        flipped_preds, labels = predict(new_model,
                                        testset,
                                        training_params,
                                        flip=True,
                                        verbose=verbose)
        final_preds += flipped_preds
        k += 1.0

    # Arithmetic averaging of predictions
    final_preds_arithm = final_preds / k

    return final_preds_arithm, labels
Example #7
0
def get_features_on_exp(position, mode, N, training_params, verbose=False):

    model, path_model = get_best_model_from_exp(training_params.path_out)
    initial_input_shape = model.input_shape
    print "\n" + path_model
    k = 0
    out = []
    for test_size in training_params.test_sizes:
        if verbose:
            s = "\nTesting for size :" + str(test_size)
            print s
        # Get the best model
        if test_size[0] != model.input_shape[2] or test_size[
                1] != model.input_shape[3]:
            new_model = adapt_to_new_input(
                model, (test_size[2], test_size[0], test_size[1]),
                initial_input_shape[1:],
                verbose=True)
        else:
            new_model = model

        dataset = FuelDataset(mode,
                              test_size,
                              batch_size=training_params.test_batch_size,
                              shuffle=False,
                              division=training_params.division)
        preds, labels = get_features(new_model,
                                     dataset,
                                     position,
                                     N,
                                     training_params,
                                     True,
                                     flip=False)
        # Predictions on the flipped testset
        flipped_preds, flipped_labels = get_features(new_model,
                                                     dataset,
                                                     position,
                                                     N,
                                                     training_params,
                                                     True,
                                                     flip=True)
        out.append(preds)
        out.append(flipped_preds)
    return out, labels
Example #8
0
def launch_adversarial_training(training_params):
    """
    Load the data, and train a Keras model.

    :param training_params: a TrainingParams object which contains each parameter of the training
    :return:
    """
    if os.path.exists(training_params.path_out) is False:
        os.mkdir(os.path.abspath(training_params.path_out))

    ###### LOADING VALIDATION DATA #######
    validset, valid_targets = load_dataset_in_memory_and_resize(training_params.data_access, "valid", training_params.dataset_path,
                                                                training_params.targets_path, training_params.final_size,
                                                                training_params.final_size, training_params.test_batch_size)
    valid_targets = convert_labels(valid_targets)

    ###### Preprocessing VALIDATION DATA #######
    for mode in training_params.valid_preprocessing:
        validset = preprocess_dataset(validset, training_params, mode)
    # Transpose validset >> (N, channel, X, Y)
    validset = validset.transpose(0,3,1,2)
    # Multiple input ?
    if training_params.multiple_inputs>1:
        validset = [validset for i in range(training_params.multiple_inputs)]

    ###### MODEL INITIALIZATION #######
    with timer("Model initialization"):
        model = training_params.initialize_model()
    if training_params.pretrained_model is not None:
        with timer("Pretrained Model initialization"):
            pretrained_model = training_params.initialize_pretrained_model()
            training_params.generator_args.append(pretrained_model)
            # preprocessed the validset
            if type(pretrained_model) is list:
                features = []
                for pmodel in pretrained_model:
                    features.append(pmodel.predict(validset))
                validset = np.concatenate(features, axis=1)
            else:
                validset = pretrained_model.predict(validset)

    ###### SAVE PARAMS ######
    s = training_params.print_params()
    # Save command
    f = open(training_params.path_out+"/command.txt", "w")
    f.writelines(" ".join(sys.argv))
    f.writelines(s)
    f.close()
    # Print architecture
    print_architecture(model, path_out=training_params.path_out + "/architecture.txt")

    ###### TRAINING SET #######

    train_dataset = FuelDataset("train", training_params.tmp_size,
                                batch_size=training_params.batch_size,
                                bagging=training_params.bagging_size,
                                bagging_iterator=training_params.bagging_iterator)

    ###### ADVERSARIAL MAPPING ######

    input_ = model.layers[0].input
    y_ = model.y
    layer_output = model.layers[-1].get_output()
    xent = K.categorical_crossentropy(y_, layer_output)
    loss = xent.mean()
    grads = K.gradients(loss, input_)
    get_grads = K.function([input_, y_], [loss, grads])

    ###### TRAINING LOOP #######
    count = training_params.fine_tuning
    epoch_count = 0

    with timer("Training"):
        while training_params.learning_rate >= training_params.learning_rate_min and epoch_count<training_params.nb_max_epoch:

            if count != 0: # Restart from the best model with a lower LR
                model = training_params.initialize_model()
                model.load_weights(training_params.path_out+"/MEM_%d/best_model.cnn"%(count-1))
                # Recompile get_grads
                input_ = model.layers[0].input
                y_ = model.y
                layer_output = model.layers[-1].get_output()
                xent = K.categorical_crossentropy(y_, layer_output)
                loss = xent.mean()
                grads = K.gradients(loss, input_)
                get_grads = K.function([input_, y_], [loss, grads])

            best = 0.0
            patience = training_params.max_no_best
            losses = []
            adv_losses = []
            accuracies = []
            adv_accuracies = []
            valid_losses = []
            valid_accuracies = []
            epoch_count = 0
            no_best_count = 0
            path = training_params.path_out + "/MEM_%d"%count
            if os.path.exists(path) is False:
                os.mkdir(path)
            # Log file
            f = open(path+"/log.txt", "w")
            f.write("LR = %.2f\n"%training_params.learning_rate)
            f.close()
            # Config file
            open(path+"/config.netconf", 'w').write(model.to_json())

            while no_best_count < patience and epoch_count < training_params.nb_max_epoch:
                new = True
                loss = 0.0
                adv_loss = 0.0
                accuracy = 0.0
                adv_accuracy = 0.0
                # Trainset Loop
                N = training_params.Ntrain/(training_params.batch_size*1)
                for i in range(N):
                    # Train
                    print "\rEpoch %d : Batch %d over %d"%(epoch_count, i, N),
                    processed_batch, labels = get_next_batch(train_dataset, training_params.batch_size,
                                                             training_params.final_size,
                                                             training_params.preprocessing_func,
                                                             training_params.preprocessing_args)
                    l, acc = model.train_on_batch(processed_batch, labels, accuracy=True)
                    # Update stats
                    if new:
                        loss = l
                        accuracy = acc
                    else:
                        loss = 0.9*loss + 0.1*l
                        accuracy = 0.9*accuracy + 0.1*acc
                    # Get adversarial examples
                    l, grads = get_grads([processed_batch, labels])
                    updates = np.sign(grads)
                    adversarials = processed_batch + updates
                    # Train on adv examples
                    adv_l, adv_acc = model.train_on_batch(adversarials, labels, accuracy=True)
                    # Update stats
                    if new:
                        adv_loss = adv_l
                        adv_accuracy = adv_acc
                        new = False
                    else:
                        adv_loss = 0.9*adv_loss + 0.1*adv_l
                        adv_accuracy = 0.9*adv_accuracy + 0.1*adv_acc
                # Store stats
                losses.append(loss)
                accuracies.append(accuracy)
                adv_losses.append(adv_loss)
                adv_accuracies.append(adv_accuracy)
                # Validset loss and accuracy
                out = model.predict(validset)
                valid_loss = categorical_crossentropy(valid_targets, out)
                count = np.sum(np.argmax(valid_targets, axis=1) - np.argmax(out, axis=1) == 0)
                score = float(count)/valid_targets.shape[0]
                valid_losses.append(valid_loss)
                valid_accuracies.append(score)

                # Stop criterion and Save model
                string = "***\nEpoch %d: Loss : %0.5f, Adv loss : %0.5f, Valid loss : %0.5f, " \
                         "Acc : %0.5f, Adv acc : %0.5f, Valid acc : %0.5f"%(epoch_count, losses[-1], adv_losses[-1],
                                                                            valid_losses[-1], accuracies[-1],
                                                                            adv_accuracies[-1], valid_accuracies[-1])
                if score > best:
                    no_best_count = 0
                    save_path = path+"/best_model.cnn"
                    if training_params.verbose>0:
                        string = string +"\tBEST\n"
                        print string
                        write_log(path+"/log.txt", string)
                    best = score
                    model.save_weights(save_path, overwrite=True)
                else:
                    no_best_count += 1
                    save_path = path+"/last_epoch.cnn"
                    if training_params.verbose>0:
                        string = string + "\n"
                        print string
                        write_log(path+"/log.txt", string)
                    model.save_weights(save_path, overwrite=True)
                epoch_count += 1

            # Update learning rate
            training_params.learning_rate *= 0.1
            training_params.update_model_args()
            with open(path + "/history.pkl","w") as f:
                pickle.dump(losses,f)
                pickle.dump(adv_losses,f)
                pickle.dump(valid_losses,f)
                pickle.dump(accuracies,f)
                pickle.dump(adv_accuracies,f)
                pickle.dump(valid_accuracies,f)
            count += 1
Example #9
0
def test_model_on_exp(training_params, verbose=False, write_txt_file=False):
    model, path_model = get_best_model_from_exp(training_params.path_out)
    initial_input_shape = model.input_shape
    print "\n" + path_model
    k = 0
    lines = []
    for test_size in training_params.test_sizes:
        if verbose:
            s = "\nTesting for size :" + str(test_size)
            print s
            lines.append(s)
        # Get the best model
        if test_size[0] != model.input_shape[2] or test_size[
                1] != model.input_shape[3]:
            new_model = adapt_to_new_input(
                model, (test_size[2], test_size[0], test_size[1]),
                initial_input_shape[1:],
                verbose=True)
        else:
            new_model = model

        testset = FuelDataset("valid",
                              test_size,
                              batch_size=training_params.test_batch_size,
                              shuffle=False,
                              division="leaderboard")
        score, loss, preds, labels = test_model(new_model,
                                                testset,
                                                training_params,
                                                flip=False,
                                                verbose=verbose,
                                                return_preds=True)
        if write_txt_file:
            lines.append(
                "\n\tDraw testset score = %.5f\n\tDraw testset loss = %.5f" %
                (score, loss))
        if k == 0:
            final_preds = np.copy(preds)
        else:
            final_preds += preds
        k += 1.0
        # Predictions on the flipped testset
        flipped_score, flipped_loss, flipped_preds, labels = test_model(
            new_model,
            testset,
            training_params,
            flip=True,
            verbose=verbose,
            return_preds=True)
        if write_txt_file:
            lines.append(
                "\n\tFlipped testset score = %.5f\n\tFlipped testset loss = %.5f"
                % (flipped_score, flipped_loss))
        final_preds += flipped_preds
        k += 1.0

    # Arithmetic averaging of predictions
    final_preds_arithm = final_preds / k
    count = np.sum(
        np.argmax(labels, axis=1) - np.argmax(final_preds_arithm, axis=1) == 0)
    final_score_arithm = float(count) / labels.shape[0]
    if verbose:
        s = "\nFinal score (arithm) =%.5f" % final_score_arithm
        print s
        lines.append(s)

    if write_txt_file:
        f = open(training_params.path_out + "/testset_score.txt", "w")
        for line in lines:
            f.writelines(line)
        f.close()

    return final_preds_arithm, final_score_arithm, labels