def generate_prediction(description, testing_file_content, testing_image_feature_dict, prediction_file_prefix, feature_extension): """Generate prediction. :param description: the folder name of the working directory :type description: string :param testing_file_content: the content in the testing file :type testing_file_content: numpy array :param testing_image_feature_dict: the features of the testing images which is saved in a dict :type testing_image_feature_dict: dict :param prediction_file_prefix: the prefix of the prediction file :type prediction_file_prefix: string :param feature_extension: the extension of the feature files :type feature_extension: string :return: the prediction file will be saved to disk :rtype: None """ print("\nGenerating prediction ...") working_directory = common.get_working_directory(description) model_path_rule = os.path.join(working_directory, "*" + common.KERAS_MODEL_EXTENSION) metric_list = METRIC_LIST_DICT[feature_extension] for model_path in sorted(glob.glob(model_path_rule)): model_name = os.path.basename(os.path.splitext(model_path)[0]) print("\nWorking on {} ...".format(model_name)) # Init a keras model with specific weights final_feature = solution_basic.get_final_feature(testing_image_feature_dict.values()[0], testing_image_feature_dict.values()[0], metric_list) dimension = final_feature.size model = keras_related.init_model(dimension) model.load_weights(model_path) # Add progress bar progress_bar = pyprind.ProgBar(testing_file_content.shape[0], monitor=True) # Generate prediction prediction_list = [] for _, file_1_name, file_2_name in testing_file_content: file_1_feature = testing_image_feature_dict[file_1_name] file_2_feature = testing_image_feature_dict[file_2_name] final_feature = solution_basic.get_final_feature(file_1_feature, file_2_feature, metric_list) final_feature = final_feature.reshape(1, -1) probability_estimates = model.predict_proba(final_feature, verbose=0) prediction = probability_estimates[0, 1] prediction_list.append(prediction) # Update progress bar progress_bar.update() # Report tracking information print(progress_bar) # Write prediction prediction_file_name = prediction_file_prefix + model_name + "_" + str(int(time.time())) + ".csv" solution_basic.write_prediction(testing_file_content, np.array(prediction_list), prediction_file_name)
def perform_training(image_feature_list, image_index_list, description, feature_extension, nb_epoch): """Perform training phase. :param image_feature_list: the features of the images :type image_feature_list: list :param image_index_list: the indexes of the images :type image_index_list: list :param description: the folder name of the working directory :type description: string :param feature_extension: the extension of the feature files :type feature_extension: string :param nb_epoch: the maximum number of epochs :type nb_epoch: int :return: the model files will be saved to disk :rtype: None """ print("Performing training phase ...") # Reset the working directory common.reset_working_directory(description) working_directory = common.get_working_directory(description) # Cross Validation fold_num = 5 best_score_array = np.zeros(fold_num) best_score_index_array = np.zeros(fold_num) label_kfold = LabelKFold(image_index_list, n_folds=fold_num) # Add progress bar progress_bar = pyprind.ProgBar(fold_num, monitor=True) metric_list = METRIC_LIST_DICT[feature_extension] for fold_index, fold_item in enumerate(label_kfold): print("\nWorking on the {:d}/{:d} fold ...".format(fold_index + 1, fold_num)) # Generate final data set X_train, Y_train = solution_basic.convert_to_final_data_set(image_feature_list, image_index_list, fold_item[0], 1, metric_list) X_test, Y_test = solution_basic.convert_to_final_data_set(image_feature_list, image_index_list, fold_item[1], None, metric_list) # Perform training model_name = "Model_{:d}".format(fold_index + 1) + common.KERAS_MODEL_EXTENSION model_path = os.path.join(working_directory, model_name) best_score_index, best_score = keras_related.train_model(X_train, Y_train, X_test, Y_test, model_path, nb_epoch) best_score_array[fold_index] = best_score best_score_index_array[fold_index] = best_score_index print("For the {:d} fold, the Keras model achieved the score {:.4f} at the {:d} epoch.".format(fold_index + 1, best_score, best_score_index)) # Update progress bar progress_bar.update() # Report tracking information print(progress_bar) print("\nThe best score is {:.4f} and the highest epoch is {:d}.".format(np.max(best_score_array), np.max(best_score_index_array).astype(np.int)))
def generate_prediction(description, testing_file_content, testing_image_feature_dict, prediction_file_prefix, feature_extension): """Generate prediction. :param description: the folder name of the working directory :type description: string :param testing_file_content: the content in the testing file :type testing_file_content: numpy array :param testing_image_feature_dict: the features of the testing images which is saved in a dict :type testing_image_feature_dict: dict :param prediction_file_prefix: the prefix of the prediction file :type prediction_file_prefix: string :param feature_extension: the extension of the feature files :type feature_extension: string :return: the prediction file will be saved to disk :rtype: None """ print("\nGenerating prediction ...") working_directory = common.get_working_directory(description) model_path_rule = os.path.join(working_directory, "*" + common.SCIKIT_LEARN_EXTENSION) metric_list = METRIC_LIST_DICT[feature_extension] for model_path in sorted(glob.glob(model_path_rule)): model_name = os.path.basename(os.path.splitext(model_path)[0]) print("\nWorking on {} ...".format(model_name)) # Load the sklearn model classifier = joblib.load(model_path) # Add progress bar progress_bar = pyprind.ProgBar(testing_file_content.shape[0], monitor=True) # Generate prediction prediction_list = [] for _, file_1_name, file_2_name in testing_file_content: file_1_feature = testing_image_feature_dict[file_1_name] file_2_feature = testing_image_feature_dict[file_2_name] final_feature = solution_basic.get_final_feature( file_1_feature, file_2_feature, metric_list) final_feature = final_feature.reshape(1, -1) probability_estimates = classifier.predict_proba(final_feature) prediction = probability_estimates[0, 1] prediction_list.append(prediction) # Update progress bar progress_bar.update() # Report tracking information print(progress_bar) # Write prediction prediction_file_name = prediction_file_prefix + model_name + "_" + str( int(time.time())) + ".csv" solution_basic.write_prediction(testing_file_content, np.array(prediction_list), prediction_file_name)
def perform_training(image_feature_list, image_index_list, description, feature_extension): """Perform training phase. :param image_feature_list: the features of the images :type image_feature_list: list :param image_index_list: the indexes of the images :type image_index_list: list :param description: the folder name of the working directory :type description: string :param feature_extension: the extension of the feature files :type feature_extension: string :return: the model files will be saved to disk :rtype: None """ print("Performing training phase ...") # Reset the working directory common.reset_working_directory(description) working_directory = common.get_working_directory(description) # Cross Validation fold_num = 5 best_score_array = np.zeros(fold_num) label_kfold = LabelKFold(image_index_list, n_folds=fold_num) # Add progress bar progress_bar = pyprind.ProgBar(fold_num, monitor=True) metric_list = METRIC_LIST_DICT[feature_extension] for fold_index, fold_item in enumerate(label_kfold): print("\nWorking on the {:d}/{:d} fold ...".format( fold_index + 1, fold_num)) # Generate final data set X_train, Y_train = solution_basic.convert_to_final_data_set( image_feature_list, image_index_list, fold_item[0], 1, metric_list) X_test, Y_test = solution_basic.convert_to_final_data_set( image_feature_list, image_index_list, fold_item[1], None, metric_list) # Perform training model_name = "Model_{:d}".format(fold_index + 1) + common.SCIKIT_LEARN_EXTENSION model_path = os.path.join(working_directory, model_name) best_score = sklearn_related.train_model(X_train, Y_train, X_test, Y_test, model_path) best_score_array[fold_index] = best_score print( "For the {:d} fold, the sklearn model achieved the score {:.4f}.". format(fold_index + 1, best_score)) # Update progress bar progress_bar.update() # Report tracking information print(progress_bar) print("\nThe best score is {:.4f}.".format(np.max(best_score_array)))