def generate_prediction(description, testing_file_content, testing_image_feature_dict, prediction_file_prefix, feature_extension):
    """Generate prediction.
    
    :param description: the folder name of the working directory
    :type description: string
    :param testing_file_content: the content in the testing file
    :type testing_file_content: numpy array
    :param testing_image_feature_dict: the features of the testing images which is saved in a dict
    :type testing_image_feature_dict: dict
    :param prediction_file_prefix: the prefix of the prediction file
    :type prediction_file_prefix: string
    :param feature_extension: the extension of the feature files
    :type feature_extension: string
    :return: the prediction file will be saved to disk
    :rtype: None
    """

    print("\nGenerating prediction ...")

    working_directory = common.get_working_directory(description)
    model_path_rule = os.path.join(working_directory, "*" + common.KERAS_MODEL_EXTENSION)
    metric_list = METRIC_LIST_DICT[feature_extension]
    for model_path in sorted(glob.glob(model_path_rule)):
        model_name = os.path.basename(os.path.splitext(model_path)[0])
        print("\nWorking on {} ...".format(model_name))

        # Init a keras model with specific weights
        final_feature = solution_basic.get_final_feature(testing_image_feature_dict.values()[0], testing_image_feature_dict.values()[0], metric_list)
        dimension = final_feature.size
        model = keras_related.init_model(dimension)
        model.load_weights(model_path)

        # Add progress bar
        progress_bar = pyprind.ProgBar(testing_file_content.shape[0], monitor=True)

        # Generate prediction
        prediction_list = []
        for _, file_1_name, file_2_name in testing_file_content:
            file_1_feature = testing_image_feature_dict[file_1_name]
            file_2_feature = testing_image_feature_dict[file_2_name]
            final_feature = solution_basic.get_final_feature(file_1_feature, file_2_feature, metric_list)
            final_feature = final_feature.reshape(1, -1)

            probability_estimates = model.predict_proba(final_feature, verbose=0)
            prediction = probability_estimates[0, 1]
            prediction_list.append(prediction)

            # Update progress bar
            progress_bar.update()

        # Report tracking information
        print(progress_bar)

        # Write prediction
        prediction_file_name = prediction_file_prefix + model_name + "_" + str(int(time.time())) + ".csv"
        solution_basic.write_prediction(testing_file_content, np.array(prediction_list), prediction_file_name)
def perform_training(image_feature_list, image_index_list, description, feature_extension, nb_epoch):
    """Perform training phase.
    
    :param image_feature_list: the features of the images
    :type image_feature_list: list
    :param image_index_list: the indexes of the images
    :type image_index_list: list
    :param description: the folder name of the working directory
    :type description: string
    :param feature_extension: the extension of the feature files
    :type feature_extension: string
    :param nb_epoch: the maximum number of epochs
    :type nb_epoch: int
    :return: the model files will be saved to disk
    :rtype: None
    """

    print("Performing training phase ...")

    # Reset the working directory
    common.reset_working_directory(description)
    working_directory = common.get_working_directory(description)

    # Cross Validation
    fold_num = 5
    best_score_array = np.zeros(fold_num)
    best_score_index_array = np.zeros(fold_num)
    label_kfold = LabelKFold(image_index_list, n_folds=fold_num)

    # Add progress bar
    progress_bar = pyprind.ProgBar(fold_num, monitor=True)

    metric_list = METRIC_LIST_DICT[feature_extension]
    for fold_index, fold_item in enumerate(label_kfold):
        print("\nWorking on the {:d}/{:d} fold ...".format(fold_index + 1, fold_num))

        # Generate final data set
        X_train, Y_train = solution_basic.convert_to_final_data_set(image_feature_list, image_index_list, fold_item[0], 1, metric_list)
        X_test, Y_test = solution_basic.convert_to_final_data_set(image_feature_list, image_index_list, fold_item[1], None, metric_list)

        # Perform training
        model_name = "Model_{:d}".format(fold_index + 1) + common.KERAS_MODEL_EXTENSION
        model_path = os.path.join(working_directory, model_name)
        best_score_index, best_score = keras_related.train_model(X_train, Y_train, X_test, Y_test, model_path, nb_epoch)
        best_score_array[fold_index] = best_score
        best_score_index_array[fold_index] = best_score_index

        print("For the {:d} fold, the Keras model achieved the score {:.4f} at the {:d} epoch.".format(fold_index + 1, best_score, best_score_index))

        # Update progress bar
        progress_bar.update()

    # Report tracking information
    print(progress_bar)

    print("\nThe best score is {:.4f} and the highest epoch is {:d}.".format(np.max(best_score_array), np.max(best_score_index_array).astype(np.int)))
Beispiel #3
0
def generate_prediction(description, testing_file_content,
                        testing_image_feature_dict, prediction_file_prefix,
                        feature_extension):
    """Generate prediction.
    
    :param description: the folder name of the working directory
    :type description: string
    :param testing_file_content: the content in the testing file
    :type testing_file_content: numpy array
    :param testing_image_feature_dict: the features of the testing images which is saved in a dict
    :type testing_image_feature_dict: dict
    :param prediction_file_prefix: the prefix of the prediction file
    :type prediction_file_prefix: string
    :param feature_extension: the extension of the feature files
    :type feature_extension: string
    :return: the prediction file will be saved to disk
    :rtype: None
    """

    print("\nGenerating prediction ...")

    working_directory = common.get_working_directory(description)
    model_path_rule = os.path.join(working_directory,
                                   "*" + common.SCIKIT_LEARN_EXTENSION)
    metric_list = METRIC_LIST_DICT[feature_extension]
    for model_path in sorted(glob.glob(model_path_rule)):
        model_name = os.path.basename(os.path.splitext(model_path)[0])
        print("\nWorking on {} ...".format(model_name))

        # Load the sklearn model
        classifier = joblib.load(model_path)

        # Add progress bar
        progress_bar = pyprind.ProgBar(testing_file_content.shape[0],
                                       monitor=True)

        # Generate prediction
        prediction_list = []
        for _, file_1_name, file_2_name in testing_file_content:
            file_1_feature = testing_image_feature_dict[file_1_name]
            file_2_feature = testing_image_feature_dict[file_2_name]
            final_feature = solution_basic.get_final_feature(
                file_1_feature, file_2_feature, metric_list)
            final_feature = final_feature.reshape(1, -1)

            probability_estimates = classifier.predict_proba(final_feature)
            prediction = probability_estimates[0, 1]
            prediction_list.append(prediction)

            # Update progress bar
            progress_bar.update()

        # Report tracking information
        print(progress_bar)

        # Write prediction
        prediction_file_name = prediction_file_prefix + model_name + "_" + str(
            int(time.time())) + ".csv"
        solution_basic.write_prediction(testing_file_content,
                                        np.array(prediction_list),
                                        prediction_file_name)
Beispiel #4
0
def perform_training(image_feature_list, image_index_list, description,
                     feature_extension):
    """Perform training phase.
    
    :param image_feature_list: the features of the images
    :type image_feature_list: list
    :param image_index_list: the indexes of the images
    :type image_index_list: list
    :param description: the folder name of the working directory
    :type description: string
    :param feature_extension: the extension of the feature files
    :type feature_extension: string
    :return: the model files will be saved to disk
    :rtype: None
    """

    print("Performing training phase ...")

    # Reset the working directory
    common.reset_working_directory(description)
    working_directory = common.get_working_directory(description)

    # Cross Validation
    fold_num = 5
    best_score_array = np.zeros(fold_num)
    label_kfold = LabelKFold(image_index_list, n_folds=fold_num)

    # Add progress bar
    progress_bar = pyprind.ProgBar(fold_num, monitor=True)

    metric_list = METRIC_LIST_DICT[feature_extension]
    for fold_index, fold_item in enumerate(label_kfold):
        print("\nWorking on the {:d}/{:d} fold ...".format(
            fold_index + 1, fold_num))

        # Generate final data set
        X_train, Y_train = solution_basic.convert_to_final_data_set(
            image_feature_list, image_index_list, fold_item[0], 1, metric_list)
        X_test, Y_test = solution_basic.convert_to_final_data_set(
            image_feature_list, image_index_list, fold_item[1], None,
            metric_list)

        # Perform training
        model_name = "Model_{:d}".format(fold_index +
                                         1) + common.SCIKIT_LEARN_EXTENSION
        model_path = os.path.join(working_directory, model_name)
        best_score = sklearn_related.train_model(X_train, Y_train, X_test,
                                                 Y_test, model_path)
        best_score_array[fold_index] = best_score

        print(
            "For the {:d} fold, the sklearn model achieved the score {:.4f}.".
            format(fold_index + 1, best_score))

        # Update progress bar
        progress_bar.update()

    # Report tracking information
    print(progress_bar)

    print("\nThe best score is {:.4f}.".format(np.max(best_score_array)))