Exemple #1
0
def main_EXP_G_VULN_GEO_LOCATION_evaluate_ANN_remapping():
    read_command_line_options()

    gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU)

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit(
            "The set size lists must all contain the same amount of items.")

    loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH)
    loaded_g_matrix_rows = pn.read_pickle(path=G_MATRIX_ROWS_PATH)
    loaded_g_matrix_cols = pn.read_pickle(path=G_MATRIX_COLS_PATH)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN approach  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if 'ANN' in APPROACHES:
        print(
            "\n####################################################################################"
        )
        print(
            "###################################  ANN approach  ##################################"
        )
        print(
            "####################################################################################\n"
        )

        #   store the values of the error estimations via all the possible methods
        ANN_Rf_values = []

        #   number of test samples
        number_of_test_samples = []

        #   number of training samples
        number_of_training_samples = []

        #   iterator over different training sets iterations
        training_iteration = []

        #   iterator over different test sets iterations
        test_iteration = []

        for size_list_iterator in range(len(TRAINING_SET_SIZE)):

            #   select the current values for the sizes (useful to keep track in the names of the
            training_set_size = TRAINING_SET_SIZE[size_list_iterator]
            validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
            test_set_size = TEST_SET_SIZE[size_list_iterator]

            for train_iteration in range(TRAIN_ITERATIONS):
                filepath = RESULT_FOLDER + MODEL_NAME + "/" + str(
                    training_set_size) + "_training_size_and_" + str(
                        validation_set_size
                    ) + "_validation_size_iteration_" + str(train_iteration)

                tr_set = pn.read_pickle(
                    path=DATA_FOLDER + str(training_set_size) +
                    "_training_and_" + str(validation_set_size) +
                    "_validation_store_folder_train_iteration_" +
                    str(train_iteration) + "/training_set.pkl")
                X_train = tr_set[:, 0]
                min_max_scaler = preprocessing.MinMaxScaler()

                X_train = X_train.reshape(-1, 1)
                X_train = min_max_scaler.fit_transform(X_train)

                if not os.path.exists(filepath):
                    continue

                else:
                    ANN_model = load_model(filepath=filepath +
                                           "/classifier_net_model")
                    # print ANN_model.summary()

                print "\n\n\n\n\n\n\n\n#################################  test_size: " + str(
                    test_set_size) + " ################################"
                ANN_file_Rf_ANN_g_leak = open(
                    filepath + "/ANN_" + str(training_set_size) +
                    "_training_and_" + str(validation_set_size) +
                    "_validation_file_R_estimate_iteration_" +
                    str(train_iteration) + "_" + str(test_set_size) +
                    "_test_set_size_test_iter_up_to_" +
                    str(TEST_ITERATIONS_END) + ".txt", "wa")

                for test_iterator in range(TEST_ITERATIONS_BEG,
                                           TEST_ITERATIONS_END):
                    print "\n\n\n#################################  test_set_" + str(
                        test_iterator) + " ################################"

                    ANN_file_Rf_ANN_g_leak.write(
                        "\n\n\n#################################  test_set_" +
                        str(test_iterator) +
                        " ################################")

                    test_set = pn.read_pickle(path=DATA_FOLDER +
                                              str(test_set_size) +
                                              "_size_test_sets/test_set_" +
                                              str(test_iterator) + ".pkl")

                    X_test = test_set[:, 0]
                    X_test_unique = np.unique(X_test)

                    X_test = X_test.reshape(-1, 1)
                    X_test_unique = X_test_unique.reshape(-1, 1)
                    y_test = test_set[:, 1]
                    # z_test = test_set[:, 2]
                    """z_test = preprocess.array_one_hot_encoder(supervision_=z_test)

                    X_test_final_list = []
                    y_test_final_list = []
                    z_test_final_list = []

                    for unique_ob in X_test_unique:
                        ob_idx = np.where(X_test == unique_ob)[0]
                        unique_secr = np.unique(y_test[ob_idx])

                        for unq_sec in unique_secr:
                            z_idx = np.where((X_test == unique_ob) & (y_test == unq_sec))[0]
                            tmp = np.mean(z_test[z_idx, :], axis=0)
                            idx_max = np.argmax(tmp)
                            X_test_final_list.append(unique_ob)
                            y_test_final_list.append(unq_sec)
                            z_test_final_list.append(idx_max)

                    X_test = np.array(X_test_final_list).reshape((len(X_test_final_list), 1))
                    y_test = np.array(y_test_final_list).reshape((len(y_test_final_list), 1))
                    z_test = np.array(z_test_final_list).reshape((len(z_test_final_list), 1))"""

                    #   this  will have an element for each element in the test set
                    # X_test_preprocessed = preprocess.scaler_between_minus_one_and_one(column=X_test,
                    #                                                                  min_column=MIN_OBSERVABLE,
                    #                                                                  max_column=MAX_OBSERVABLE)
                    X_test_preprocessed = min_max_scaler.transform(X_test)

                    #   this too will have an element for each unique value in the test set
                    # X_test_preprocessed_unique = preprocess.scaler_between_minus_one_and_one(column=X_test_unique,
                    #                                                                        min_column=MIN_OBSERVABLE,
                    #                                                                         max_column=MAX_OBSERVABLE)
                    X_test_preprocessed_unique = min_max_scaler.transform(
                        X_test_unique)

                    if len(X_test_preprocessed_unique) != len(
                            np.unique(X_test_preprocessed_unique)):
                        sys.exit(
                            "The preprocessing created some collision which might affect the computation"
                        )

                    # print X_test_preprocessed_unique

                    new_old_obs = {}
                    for i in range(len(X_test_preprocessed_unique)):
                        new_old_obs[X_test_preprocessed_unique[i]
                                    [0]] = X_test_unique[i][0]
                    # print new_old_obs

                    ###########################################################  Prediction

                    print "X_test_preprocessed: ", X_test_preprocessed.shape
                    print "y_test.shape: ", y_test.shape

                    ANN_prediction_test = []

                    pred = ANN_model.predict(x=X_test_preprocessed)

                    for row_iter in range(pred.shape[0]):
                        ANN_prediction_test.append(np.argmax(
                            pred[row_iter, :]))

                    ANN_prediction_test = np.array(
                        ANN_prediction_test).reshape(len(ANN_prediction_test),
                                                     1)

                    final_matrix = np.column_stack((X_test, y_test))
                    final_matrix = np.column_stack(
                        (final_matrix, ANN_prediction_test))

                    Rf_ANN_g_leak = g_vuln_computation.compute_g_vuln_with_remapping(
                        final_mat=final_matrix,
                        g_mat=loaded_g_matrix,
                        g_mat_rows=loaded_g_matrix_rows,
                        g_mat_cols=loaded_g_matrix_cols)

                    print("\nRf_ANN_g_leak = " + str(Rf_ANN_g_leak))

                    ANN_file_Rf_ANN_g_leak.write(
                        "\nANN_file_Rf_ANN_g_leak = " + str(Rf_ANN_g_leak))
                    ANN_Rf_values.append(Rf_ANN_g_leak)
                    number_of_test_samples.append(test_set_size)
                    number_of_training_samples.append(training_set_size)
                    training_iteration.append(train_iteration)
                    test_iteration.append(test_iterator)

                    ###########################################################  Accuracy computation

                    # accuracy = round(utilities.compute_accuracy(y_classes=z_test,
                    #                                             y_pred_classes=ANN_prediction_test), 3)
                    #
                    # print "\nAccuracy ---> ", accuracy
                    # ANN_file_Rf_ANN_g_leak.write("\nAccuracy --->" + str(accuracy))
                    #
                    # ###########################################################  Accuracy computation (tf fashion)
                    #
                    # accuracy_tf_fashion = round(utilities.compute_accuracy_tf_fashion(y_classes=z_test,
                    #                                                                   y_pred_classes=
                    #                                                                   ANN_prediction_test), 3)
                    #
                    # print "\nAccuracy tf fashion ---> ", accuracy_tf_fashion
                    #
                    # ANN_file_Rf_ANN_g_leak.write("\nAccuracy tf fashion ---> " + str(accuracy_tf_fashion))
                    #
                    # ###########################################################  Precision computation
                    #
                    # precision = utilities.compute_precision(y_classes=z_test,
                    #                                         y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nPrecision ---> ", precision
                    #
                    # ANN_file_Rf_ANN_g_leak.write("\nPrecision ---> " + str(precision))
                    #
                    # ###########################################################  Recall computation
                    #
                    # recall = utilities.compute_recall(y_classes=z_test,
                    #                                   y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nRecall ---> ", recall
                    #
                    # ANN_file_Rf_ANN_g_leak.write("\nRecall ---> " + str(recall))
                    #
                    # ###########################################################  F1_score computation
                    #
                    # F1_score = utilities.compute_f1_score(y_classes=y_test,
                    #                                       y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nF1_score ---> ", F1_score
                    #
                    # ANN_file_Rf_ANN_g_leak.write("\nF1_score ---> " + str(F1_score))

                ANN_file_Rf_ANN_g_leak.close()

        ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64)
        number_of_test_samples = np.array(number_of_test_samples,
                                          dtype=np.int32)
        number_of_training_samples = np.array(number_of_training_samples,
                                              dtype=np.int32)
        training_iteration = np.array(training_iteration, dtype=np.int32)
        test_iteration = np.array(test_iteration, dtype=np.int32)

        result_matrix = np.column_stack(
            (ANN_Rf_values, number_of_test_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack(
            (result_matrix, number_of_training_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, training_iteration))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, test_iteration))
        # print result_matrix.shape

        result_df = pn.DataFrame(data=result_matrix,
                                 columns=[
                                     "ANN_Rf_values", "number_of_test_samples",
                                     "number_of_training_samples",
                                     "train_iteration", "test_iteration"
                                 ])
        result_df.to_pickle(
            path=RESULT_FOLDER + MODEL_NAME +
            "/ANN_training_and_validation_result_df_train_size_" +
            str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" +
            str(TEST_ITERATIONS_END) + ".pkl")
Exemple #2
0
def main_EXP_G_VULN_GEO_LOCATION_get_stats_from_classifiers():
    read_command_line_options()
    gpu_setup.gpu_setup(id_gpu="3", memory_percentage=0.5)

    if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit("The set size lists must all contain the same amount of items.")

    RESULT_FOLDER = EXP_G_VULN_GEO_LOCATION_FOLDER + "RESULT_FOLDER_REMAPPING/" + MODEL_NAME + "/"

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN approach  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if 'ANN' in APPROACHES:
        print("\n####################################################################################")
        print("###################################  ANN approach  ##################################")
        print("####################################################################################\n")

        ANN_file_get_stats_from_classifiers = open(RESULT_FOLDER + "ANN_file_get_stats_from_classifiers.txt", "wa")

        for size_list_iterator in range(len(TRAINING_SET_SIZE)):

            #   select the current values for the sizes (useful to keep track in the names of the
            training_set_size = TRAINING_SET_SIZE[size_list_iterator]
            validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
            test_set_size = TEST_SET_SIZE[size_list_iterator]

            for train_iteration in range(TRAIN_ITERATIONS):
                filepath = RESULT_FOLDER + str(training_set_size) + "_training_size_and_" + str(
                    validation_set_size) + "_validation_size_iteration_" + str(train_iteration)

                if not os.path.exists(filepath):
                    sys.exit("ERROR")

                else:
                    ANN_model = load_model(filepath=filepath + "/classifier_net_model")
                    # print ANN_model.summary()

                training_set = pn.read_pickle(
                    path=DATA_FOLDER + str(training_set_size) + "_training_and_" + str(
                        validation_set_size) + "_validation_and_" + str(
                        test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) +
                         "/training_set.pkl").values

                X_train = training_set[:, 0]
                min_max_scaler = preprocessing.MinMaxScaler()

                X_train = X_train.reshape(-1, 1)
                X_train = min_max_scaler.fit_transform(X_train)

                ANN_file_get_stats_from_classifiers.write(
                    "\n\n\n#################################################################")
                ANN_file_get_stats_from_classifiers.write("\n\n\n#################################  training_set_" + str(
                    train_iteration) + " ################################")
                ANN_file_get_stats_from_classifiers.write(
                    "\n\n\n#################################################################")

                tr_loss, tr_acc, tr_myacc = eval_on_dataset(data_set=training_set, model=ANN_model,
                                                            min_max_scaler=min_max_scaler)

                print "model ---> ", MODEL_NAME

                print "\nTraining set, ", str(training_set_size), "size, iteration ", str(train_iteration)
                ANN_file_get_stats_from_classifiers.write(
                    "\nTraining set, " + str(training_set_size) + " size, iteration " + str(train_iteration))

                print "\ntraining_loss: ", round(tr_loss, 3)
                ANN_file_get_stats_from_classifiers.write("\ntraining_loss: " + str(round(tr_loss, 3)))

                print "\ntraining_accuracy: ", round(tr_acc, 3)
                ANN_file_get_stats_from_classifiers.write("\ntraining_accuracy: " + str(round(tr_acc, 3)) + "\n")

                print "\ntraining_my_accuracy", round(tr_myacc, 3)
                ANN_file_get_stats_from_classifiers.write("\ntraining_my_accuracy: " + str(round(tr_myacc, 3)) + "\n")

                ts_loss_list_for_avg = []
                ts_accuracy_list_for_avg = []
                ts_my_accuracy_list_for_avg = []

                for test_iterator in range(0, TEST_ITERATIONS):
                    print "\n\n\n#################################  test_set_" + str(
                        test_iterator) + " ################################"
                    # ANN_file_get_stats_from_classifiers.write("\n\n\n#################################  test_set_" + str(
                    #    test_iterator) + " ################################")

                    test_set = pn.read_pickle(
                        path=DATA_FOLDER + str(training_set_size) + "_training_and_" + str(
                            validation_set_size) + "_validation_and_" + str(
                            test_set_size) + "_test_store_folder_train_iteration_" + str(train_iteration) + "/" + str(
                            test_set_size) + "_size_test_sets/test_set_" + str(test_iterator)).values

                    ts_loss, ts_acc, ts_myacc = eval_on_dataset(data_set=test_set, model=ANN_model,
                                                                min_max_scaler=min_max_scaler)

                    ts_loss_list_for_avg.append(ts_loss)
                    ts_accuracy_list_for_avg.append(ts_acc)
                    ts_my_accuracy_list_for_avg.append(ts_myacc)

                ts_loss_array_for_avg = np.array(ts_loss_list_for_avg)

                test_loss_avg = round(np.mean(ts_loss_array_for_avg, axis=0), 3)
                test_loss_avg_var = round(np.var(a=ts_loss_array_for_avg, ddof=1), 3)
                test_loss_avg_standard_deviation = round(np.std(a=ts_loss_array_for_avg, ddof=1), 3)
                test_loss_avg_standard_error = round(stats.sem(a=ts_loss_array_for_avg, ddof=1), 3)

                print "test_loss_avg: " + str(test_loss_avg)
                print "test_loss_avg_var: " + str(test_loss_avg_var)
                print "test_loss_avg_standard_deviation: " + str(test_loss_avg_standard_deviation)
                print "test_loss_avg_standard_error: " + str(test_loss_avg_standard_error)

                ANN_file_get_stats_from_classifiers.write("test_loss_avg: " + str(test_loss_avg) + "\n")
                ANN_file_get_stats_from_classifiers.write("test_loss_avg_var: " + str(test_loss_avg_var) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "test_loss_avg_standard_deviation: " + str(test_loss_avg_standard_deviation) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "test_loss_avg_standard_error: " + str(test_loss_avg_standard_error) + "\n")

                ts_accuracy_array_for_avg = np.array(ts_accuracy_list_for_avg)

                ts_accuracy_avg = round(np.mean(ts_accuracy_array_for_avg, axis=0), 3)
                ts_accuracy_avg_var = round(np.var(a=ts_accuracy_array_for_avg, ddof=1), 3)
                ts_accuracy_avg_standard_deviation = round(np.std(a=ts_accuracy_array_for_avg, ddof=1), 3)
                ts_accuracy_avg_standard_error = round(stats.sem(a=ts_accuracy_array_for_avg, ddof=1), 3)

                print "ts_accuracy_avg: " + str(ts_accuracy_avg)
                print "ts_accuracy_avg_var: " + str(ts_accuracy_avg_var)
                print "ts_accuracy_avg_standard_deviation: " + str(ts_accuracy_avg_standard_deviation)
                print "ts_accuracy_avg_standard_error: " + str(ts_accuracy_avg_standard_error)

                ANN_file_get_stats_from_classifiers.write("ts_accuracy_avg: " + str(ts_accuracy_avg) + "\n")
                ANN_file_get_stats_from_classifiers.write("ts_accuracy_avg_var: " + str(ts_accuracy_avg_var) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "ts_accuracy_avg_standard_deviation: " + str(ts_accuracy_avg_standard_deviation) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "ts_accuracy_avg_standard_error: " + str(ts_accuracy_avg_standard_error) + "\n")

                ts_my_accuracy_array_for_avg = np.array(ts_my_accuracy_list_for_avg)
                ts_my_accuracy_avg = round(np.mean(ts_my_accuracy_array_for_avg, axis=0), 3)
                ts_my_accuracy_avg_var = round(np.var(a=ts_my_accuracy_array_for_avg, ddof=1), 3)
                ts_my_accuracy_avg_standard_deviation = round(np.std(a=ts_my_accuracy_array_for_avg, ddof=1), 3)
                ts_my_accuracy_avg_standard_error = round(stats.sem(a=ts_my_accuracy_array_for_avg, ddof=1), 3)

                print "ts_my_accuracy_avg: " + str(ts_my_accuracy_avg)
                print "ts_my_accuracy_avg_var: " + str(ts_my_accuracy_avg_var)
                print "ts_my_accuracy_avg_standard_deviation: " + str(ts_my_accuracy_avg_standard_deviation)
                print "ts_my_accuracy_avg_standard_error: " + str(ts_my_accuracy_avg_standard_error)

                ANN_file_get_stats_from_classifiers.write("ts_my_accuracy_avg: " + str(ts_my_accuracy_avg) + "\n")
                ANN_file_get_stats_from_classifiers.write("ts_my_accuracy_avg_var: " + str(ts_my_accuracy_avg_var) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "ts_my_accuracy_avg_standard_deviation: " + str(ts_my_accuracy_avg_standard_deviation) + "\n")
                ANN_file_get_stats_from_classifiers.write(
                    "ts_my_accuracy_avg_standard_error: " + str(ts_my_accuracy_avg_standard_error) + "\n")

        ANN_file_get_stats_from_classifiers.close()
Exemple #3
0
def main_BIS_EXP_G_VULN_DP_evaluate_ANN_remapping():
    read_command_line_options()

    gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU)

    if len(TEST_SET_SIZE) != len(TRAINING_SET_SIZE) or len(
            VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit(
            "The set size lists must all contain the same amount of items.")

    loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH)
    loaded_g_matrix_rows = pn.read_pickle(path=G_MATRIX_ROWS_PATH)
    loaded_g_matrix_cols = pn.read_pickle(path=G_MATRIX_COLS_PATH)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN approach  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if 'ANN' in APPROACHES:
        print(
            "\n####################################################################################"
        )
        print(
            "###################################  ANN approach  ##################################"
        )
        print(
            "####################################################################################\n"
        )

        #   store the values of the error estimations via all the possible methods
        ANN_Rf_values = []

        #   number of test samples
        number_of_test_samples = []

        #   number of training samples
        number_of_training_samples = []

        #   iterator over different training sets iterations
        training_iteration = []

        #   iterator over different test sets iterations
        test_iteration = []

        for size_list_iterator in range(len(TRAINING_SET_SIZE)):

            #   select the current values for the sizes (useful to keep track in the names of the
            training_set_size = TRAINING_SET_SIZE[size_list_iterator]
            validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
            test_set_size = TEST_SET_SIZE[0]

            for train_iteration in range(TRAIN_ITERATIONS):
                filepath = RESULT_FOLDER + MODEL_NAME + "/" + str(
                    training_set_size) + "_training_size_and_" + str(
                        validation_set_size
                    ) + "_validation_size_iteration_" + str(train_iteration)

                if not os.path.exists(filepath):
                    continue

                else:
                    ANN_model = load_model(filepath=filepath +
                                           "/classifier_net_model")
                    # print ANN_model.summary()

                training_set = pn.read_pickle(
                    DATA_FOLDER + str(training_set_size) + "_training_and_" +
                    str(validation_set_size) +
                    "_validation_store_folder_train_iteration_" +
                    str(train_iteration) + "/training_set.pkl")

                min_tr = np.min(training_set[:, 0:training_set.shape[1] - 2])
                max_tr = np.max(training_set[:, 0:training_set.shape[1] - 2])

                print "\n\n\n\n\n\n\n\n#################################  test_size: " + str(
                    test_set_size) + " ################################"
                g_vuln_ANN_file = open(
                    filepath + "/ANN_" + str(training_set_size) +
                    "_training_and_" + str(validation_set_size) +
                    "_validation_file_R_estimate_iteration_" +
                    str(train_iteration) + "_" + str(test_set_size) +
                    "_test_set_size_test_iter_up_to_" +
                    str(TEST_ITERATIONS_END) + ".txt", "wa")

                for test_iterator in range(TEST_ITERATIONS_BEG,
                                           TEST_ITERATIONS_END):
                    print "\n\n\n#################################  test_set_" + str(
                        test_iterator) + " ################################"

                    g_vuln_ANN_file.write(
                        "\n\n\n#################################  test_set_" +
                        str(test_iterator) +
                        " ################################")

                    test_set = pn.read_pickle(path=DATA_FOLDER +
                                              str(test_set_size) +
                                              "_size_test_set/test_set_" +
                                              str(test_iterator) + ".pkl")

                    # X_test = test_set[:, 0:test_set.shape[1] - 2]
                    # y_test = test_set[:, -2]
                    # z_test = test_set[:, -1]

                    X_test = test_set[:, 0:test_set.shape[1] - 1]
                    y_test = test_set[:, -1]

                    dt = np.dtype(
                        (np.void, X_test.dtype.itemsize * X_test.shape[1]))
                    b = np.ascontiguousarray(X_test).view(dt)
                    X_test_unique, X_test_unique_cnt = np.unique(
                        b, return_counts=True)
                    X_test_unique = X_test_unique.view(X_test.dtype).reshape(
                        -1, X_test.shape[1])

                    print X_test
                    print max_tr, min_tr
                    X_test_preprocessed = preprocess.scaler_zero_one_all_cols_ts(
                        data_tr=training_set[:, 0:training_set.shape[1] - 2],
                        data=X_test,
                        max_=max_tr,
                        min_=min_tr)
                    print X_test_preprocessed

                    X_test_preprocessed_unique = preprocess.scaler_zero_one_all_cols_ts(
                        data_tr=training_set[:, 0:training_set.shape[1] - 2],
                        data=X_test_unique,
                        max_=max_tr,
                        min_=min_tr)

                    # if len(X_test_preprocessed_unique) != len(np.unique(X_test_preprocessed_unique)):
                    #     sys.exit("The preprocessing created some collision which might affect the computation")

                    # print X_test_preprocessed_unique

                    # new_old_obs = {}
                    # for i in range(len(X_test_preprocessed_unique)):
                    #     new_old_obs[X_test_preprocessed_unique[i][0]] = X_test_unique[i][0]
                    # # print new_old_obs

                    ###########################################################  Prediction

                    print "X_test_preprocessed: ", X_test_preprocessed.shape
                    print "y_test.shape: ", y_test.shape

                    ANN_prediction_test = []

                    pred = ANN_model.predict(x=X_test_preprocessed)

                    print pred

                    for row_iter in range(pred.shape[0]):
                        ANN_prediction_test.append(np.argmax(
                            pred[row_iter, :]))

                    ANN_prediction_test = np.array(
                        ANN_prediction_test).reshape(len(ANN_prediction_test),
                                                     1)

                    final_matrix = np.column_stack((X_test, y_test))
                    final_matrix = np.column_stack(
                        (final_matrix, ANN_prediction_test))

                    g_vuln_ANN = g_vuln_computation.compute_g_vuln_with_remapping_multidimesional_inputs(
                        final_mat=final_matrix,
                        g_mat=loaded_g_matrix,
                        g_mat_rows=loaded_g_matrix_rows,
                        g_mat_cols=loaded_g_matrix_cols)

                    print("\ng_vuln_ANN = " + str(g_vuln_ANN))

                    g_vuln_ANN_file.write("\ng_vuln_ANN_file = " +
                                          str(g_vuln_ANN))
                    ANN_Rf_values.append(g_vuln_ANN)
                    number_of_test_samples.append(test_set_size)
                    number_of_training_samples.append(training_set_size)
                    training_iteration.append(train_iteration)
                    test_iteration.append(test_iterator)

                    # ###########################################################  Accuracy computation
                    #
                    # accuracy = round(utilities.compute_accuracy(y_classes=z_test,
                    #                                             y_pred_classes=ANN_prediction_test), 3)
                    #
                    # print "\nAccuracy ---> ", accuracy
                    # g_vuln_ANN_file.write("\nAccuracy --->" + str(accuracy))
                    #
                    # ###########################################################  Accuracy computation (tf fashion)
                    #
                    # accuracy_tf_fashion = round(utilities.compute_accuracy_tf_fashion(y_classes=z_test,
                    #                                                                   y_pred_classes=
                    #                                                                   ANN_prediction_test), 3)
                    #
                    # print "\nAccuracy tf fashion ---> ", accuracy_tf_fashion
                    #
                    # g_vuln_ANN_file.write("\nAccuracy tf fashion ---> " + str(accuracy_tf_fashion))
                    #
                    # ###########################################################  Precision computation
                    #
                    # precision = utilities.compute_precision(y_classes=z_test,
                    #                                         y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nPrecision ---> ", precision
                    #
                    # g_vuln_ANN_file.write("\nPrecision ---> " + str(precision))
                    #
                    # ###########################################################  Recall computation
                    #
                    # recall = utilities.compute_recall(y_classes=z_test,
                    #                                   y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nRecall ---> ", recall
                    #
                    # g_vuln_ANN_file.write("\nRecall ---> " + str(recall))
                    #
                    # ###########################################################  F1_score computation
                    #
                    # F1_score = utilities.compute_f1_score(y_classes=y_test,
                    #                                       y_pred_classes=ANN_prediction_test)
                    #
                    # print "\nF1_score ---> ", F1_score
                    #
                    # g_vuln_ANN_file.write("\nF1_score ---> " + str(F1_score))

                g_vuln_ANN_file.close()

        ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64)
        number_of_test_samples = np.array(number_of_test_samples,
                                          dtype=np.int32)
        number_of_training_samples = np.array(number_of_training_samples,
                                              dtype=np.int32)
        training_iteration = np.array(training_iteration, dtype=np.int32)
        test_iteration = np.array(test_iteration, dtype=np.int32)

        result_matrix = np.column_stack(
            (ANN_Rf_values, number_of_test_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack(
            (result_matrix, number_of_training_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, training_iteration))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, test_iteration))
        # print result_matrix.shape

        result_df = pn.DataFrame(data=result_matrix,
                                 columns=[
                                     "ANN_Rf_values", "number_of_test_samples",
                                     "number_of_training_samples",
                                     "train_iteration", "test_iteration"
                                 ])
        result_df.to_pickle(
            path=RESULT_FOLDER + MODEL_NAME +
            "/ANN_training_and_validation_result_df_train_size_" +
            str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" +
            str(TEST_ITERATIONS_END) + ".pkl")
Exemple #4
0
    def train_classifier_net(self,
                             results_folder,
                             training_set,
                             training_supervision,
                             validation_set,
                             validation_supervision,
                             test_set=None,
                             test_supervision=None):

        log_file = open(results_folder + "/log_file.txt", "a")

        epochs = int(self.epochs)

        batch_size = int(self.batch_size)

        perc_gpu = float(self.perc_gpu)
        gpu_setup.gpu_setup(id_gpu=self.id_gpu, memory_percentage=perc_gpu)

        classifier_net_model = self.build_classifier_network()

        self.results_folder = results_folder

        for epoch in range(epochs):
            print "\n\n\nEpoch " + str(epoch)
            log_file.write("\n\n\nEpoch " + str(epoch))
            history_classifier_net = classifier_net_model.fit(
                x=training_set,
                y=training_supervision,
                batch_size=batch_size,
                epochs=1,
                shuffle=True,
                validation_data=(validation_set, validation_supervision))

            self.classifier_network_epochs.append(
                len(history_classifier_net.history.get('loss')))
            if len(history_classifier_net.history.get('loss')) != 1:
                err_hndl(str_="error_epochs_repartition",
                         add=inspect.stack()[0][3])

            self.classifier_network_loss_vec.append(
                history_classifier_net.history.get('loss')[0])
            log_file.write("\nClassifier loss ---> " +
                           str(history_classifier_net.history.get('loss')[0]))

            self.classifier_network_categ_acc_vec.append(
                history_classifier_net.history.get('categorical_accuracy')[0])
            log_file.write("\nClassifier categorical accuracy ---> " + str(
                history_classifier_net.history.get('categorical_accuracy')[0]))

            self.classifier_network_val_loss_vec.append(
                history_classifier_net.history.get('val_loss')[0])
            log_file.write(
                "\nClassifier validation loss ---> " +
                str(history_classifier_net.history.get('val_loss')[0]))

            self.classifier_network_val_categ_acc_vec.append(
                history_classifier_net.history.get('val_categorical_accuracy')
                [0])
            log_file.write(
                "\nClassifier validation categorical accuracy ---> " + str(
                    history_classifier_net.history.get(
                        'val_categorical_accuracy')[0]))
            """#   evaluation over the test set
            test_eval = classifier_net_model.evaluate(x=test_set, y=test_supervision, batch_size=batch_size)
            self.classifier_network_evaluation_on_test_set_loss_vec.append(
                test_eval[0]
            )
            self.classifier_network_evaluation_on_test_set_accuracy_vec.append(
                test_eval[1]
            )"""

            ###########################  these operations needs prediction and argmax transformation  ##########################
            training_set_classes_supervision = np.argmax(training_supervision,
                                                         axis=1)
            training_set_classes_prediction = np.argmax(
                classifier_net_model.predict(x=training_set,
                                             batch_size=batch_size),
                axis=1)

            validation_set_classes_supervision = np.argmax(
                validation_supervision, axis=1)
            validation_set_classes_prediction = np.argmax(
                classifier_net_model.predict(x=validation_set,
                                             batch_size=batch_size),
                axis=1)
            """test_set_classes_supervision = np.argmax(test_supervision, axis=1)
            test_set_classes_prediction = np.argmax(
                classifier_net_model.predict(x=test_set, batch_size=batch_size), axis=1)"""

            training_precision = utilities.compute_precision(
                y_classes=training_set_classes_supervision,
                y_pred_classes=training_set_classes_prediction)
            log_file.write("\nClassifier training_precision ---> " +
                           str(training_precision))

            training_recall = utilities.compute_recall(
                y_classes=training_set_classes_supervision,
                y_pred_classes=training_set_classes_prediction)
            log_file.write("\nClassifier training_recall ---> " +
                           str(training_recall))

            training_f1 = utilities.compute_f1_score(
                y_classes=training_set_classes_supervision,
                y_pred_classes=training_set_classes_prediction)
            log_file.write("\nClassifier training_f1 ---> " + str(training_f1))

            self.f1_value_training.append(training_f1)

            # %%%%%%%%%%%%%%%%%%%%%%%%%%

            validation_precision = utilities.compute_precision(
                y_classes=validation_set_classes_supervision,
                y_pred_classes=validation_set_classes_prediction)
            log_file.write("\nClassifier validation_precision ---> " +
                           str(validation_precision))

            validation_recall = utilities.compute_recall(
                y_classes=validation_set_classes_supervision,
                y_pred_classes=validation_set_classes_prediction)
            log_file.write("\nClassifier validation_recall ---> " +
                           str(validation_recall))

            validation_f1 = utilities.compute_f1_score(
                y_classes=validation_set_classes_supervision,
                y_pred_classes=validation_set_classes_prediction)
            log_file.write("\nClassifier validation_f1 ---> " +
                           str(validation_f1))

            self.f1_value_validation.append(validation_f1)
            """self.f1_value_test.append(utilities.compute_f1_score(y_classes=test_set_classes_supervision,
                                                                 y_pred_classes=test_set_classes_prediction))"""

            ####################################################################################################################

            #   save all vectors
            with open(results_folder + '/classifier_network_epochs.pkl',
                      'wb') as f:
                pickle.dump(self.classifier_network_epochs, f)
            with open(results_folder + '/classifier_network_loss_vec.pkl',
                      'wb') as f:
                pickle.dump(self.classifier_network_loss_vec, f)
            with open(results_folder + '/classifier_network_categ_acc_vec.pkl',
                      'wb') as f:
                pickle.dump(self.classifier_network_categ_acc_vec, f)
            with open(results_folder + '/classifier_network_val_loss_vec.pkl',
                      'wb') as f:
                pickle.dump(self.classifier_network_val_loss_vec, f)
            with open(
                    results_folder +
                    '/classifier_network_val_categ_acc_vec.pkl', 'wb') as f:
                pickle.dump(self.classifier_network_val_categ_acc_vec, f)
            """#   classifier_net_model.evaluate ---> ['loss', 'categorical_accuracy']
            with open(results_folder + 'classifier_network_evaluation_on_test_set_loss_vec.pkl', 'wb') as f:
                pickle.dump(self.classifier_network_evaluation_on_test_set_loss_vec, f)
            with open(results_folder + 'classifier_network_evaluation_on_test_set_accuracy_vec.pkl', 'wb') as f:
                pickle.dump(self.classifier_network_evaluation_on_test_set_accuracy_vec, f)"""

            with open(results_folder + '/f1_value_training_vec.pkl',
                      'wb') as f:
                pickle.dump(self.f1_value_training, f)
            with open(results_folder + '/f1_value_validation_vec.pkl',
                      'wb') as f:
                pickle.dump(self.f1_value_validation, f)
            """with open(results_folder + 'f1_value_test_vec.pkl', 'wb') as f:
                pickle.dump(self.f1_value_test, f)"""

            classifier_net_model.save(filepath=results_folder +
                                      "/classifier_net_model")
            classifier_net_model.save_weights(filepath=results_folder +
                                              "/classifier_net_model_weights")

        log_file.close()
        return None
Exemple #5
0
def main_EXP_PSW_evaluate_classifiers_remapping():
    read_command_line_options()

    gpu_setup.gpu_setup(id_gpu=ID_GPU, memory_percentage=PERC_GPU)

    if len(VALIDATION_SET_SIZE) != len(TRAINING_SET_SIZE):
        sys.exit(
            "The set size lists must all contain the same amount of items.")

    loaded_g_matrix = pn.read_pickle(path=G_MATRIX_PATH)

    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ANN approach  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    if 'ANN' in APPROACHES:
        print(
            "\n####################################################################################"
        )
        print(
            "###################################  ANN approach  ##################################"
        )
        print(
            "####################################################################################\n"
        )

        #   store the values of the error estimations via all the possible methods
        ANN_Rf_values = []

        #   number of test samples
        number_of_test_samples = []

        #   number of training samples
        number_of_training_samples = []

        #   iterator over different training sets iterations
        training_iteration = []

        #   iterator over different test sets iterations
        test_iteration = []

        for size_list_iterator in range(len(TRAINING_SET_SIZE)):

            #   select the current values for the sizes (useful to keep track in the names of the
            training_set_size = TRAINING_SET_SIZE[size_list_iterator]
            validation_set_size = VALIDATION_SET_SIZE[size_list_iterator]
            test_set_size = TEST_SET_SIZE[0]

            for train_iteration in range(TRAIN_ITERATIONS):
                filepath = RESULT_FOLDER + MODEL_NAME + "/" + str(
                    training_set_size) + "_training_size_and_" + str(
                        validation_set_size
                    ) + "_validation_size_iteration_" + str(train_iteration)

                if not os.path.exists(filepath):
                    print filepath
                    sys.exit("NO DATA")

                else:
                    ANN_model = load_model(filepath=filepath +
                                           "/classifier_net_model")
                    # print ANN_model.summary()

                training_set = pn.read_pickle(
                    DATA_FOLDER + str(training_set_size) + "_training_and_" +
                    str(validation_set_size) +
                    "_validation_store_folder_train_iteration_" +
                    str(train_iteration) + "/training_set.pkl")
                min_max_scaler = preprocessing.MinMaxScaler()
                training_set = training_set[:, 0].reshape(-1, 1)
                training_set = min_max_scaler.fit_transform(training_set)

                print "\n\n\n\n\n\n\n\n#################################  test_size: " + str(
                    test_set_size) + " ################################"
                g_vuln_ANN_file = open(
                    filepath + "/ANN_" + str(training_set_size) +
                    "_training_and_" + str(validation_set_size) +
                    "_validation_file_R_estimate_iteration_" +
                    str(train_iteration) + "_" + str(test_set_size) +
                    "_test_set_size_test_iter_up_to_" +
                    str(TEST_ITERATIONS_END) + ".txt", "wa")

                for test_iterator in range(TEST_ITERATIONS_BEG,
                                           TEST_ITERATIONS_END):
                    print "\n\n\n#################################  test_set_" + str(
                        test_iterator) + " ################################"

                    g_vuln_ANN_file.write(
                        "\n\n\n#################################  test_set_" +
                        str(test_iterator) +
                        " ################################")

                    test_set = pn.read_pickle(path=DATA_FOLDER_TEST +
                                              str(test_set_size) +
                                              "_size_test_sets/test_set_" +
                                              str(test_iterator) + ".pkl")

                    X_test = test_set[:, 0]
                    y_test = test_set[:, 1]

                    X_test_unique = np.unique(X_test)

                    X_test = X_test.reshape(-1, 1)
                    X_test_preprocessed = min_max_scaler.transform(X_test)

                    X_test_unique = X_test_unique.reshape(-1, 1)
                    X_test_preprocessed_unique = min_max_scaler.transform(
                        X_test_unique)

                    if len(X_test_preprocessed_unique) != len(
                            np.unique(X_test_preprocessed_unique)):
                        sys.exit(
                            "The preprocessing created some collision which might affect the computation"
                        )

                    # print X_test_preprocessed_unique

                    new_old_obs = {}
                    for i in range(len(X_test_preprocessed_unique)):
                        new_old_obs[X_test_preprocessed_unique[i]
                                    [0]] = X_test_unique[i][0]
                    # print new_old_obs

                    ###########################################################  Prediction

                    print "X_test_preprocessed: ", X_test_preprocessed.shape
                    print "y_test.shape: ", y_test.shape

                    ANN_prediction_test = []

                    pred = ANN_model.predict(x=X_test_preprocessed)

                    for row_iter in range(pred.shape[0]):
                        ANN_prediction_test.append(np.argmax(
                            pred[row_iter, :]))

                    ANN_prediction_test = np.array(
                        ANN_prediction_test).reshape(len(ANN_prediction_test),
                                                     1)

                    final_matrix = np.column_stack((X_test, y_test))
                    final_matrix = np.column_stack(
                        (final_matrix, ANN_prediction_test))

                    g_vuln_ANN = g_vuln_computation.compute_g_vuln_with_remapping_positional(
                        final_mat=final_matrix, g_mat=loaded_g_matrix)
                    print("\ng_vuln_ANN = " + str(g_vuln_ANN))

                    g_vuln_ANN_file.write("\ng_vuln_ANN_file = " +
                                          str(g_vuln_ANN))
                    ANN_Rf_values.append(g_vuln_ANN)
                    number_of_test_samples.append(test_set_size)
                    number_of_training_samples.append(training_set_size)
                    training_iteration.append(train_iteration)
                    test_iteration.append(test_iterator)

                g_vuln_ANN_file.close()

        ANN_Rf_values = np.array(ANN_Rf_values, dtype=np.float64)
        number_of_test_samples = np.array(number_of_test_samples,
                                          dtype=np.int32)
        number_of_training_samples = np.array(number_of_training_samples,
                                              dtype=np.int32)
        training_iteration = np.array(training_iteration, dtype=np.int32)
        test_iteration = np.array(test_iteration, dtype=np.int32)

        result_matrix = np.column_stack(
            (ANN_Rf_values, number_of_test_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack(
            (result_matrix, number_of_training_samples))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, training_iteration))
        # print result_matrix.shape

        result_matrix = np.column_stack((result_matrix, test_iteration))
        # print result_matrix.shape

        result_df = pn.DataFrame(data=result_matrix,
                                 columns=[
                                     "ANN_Rf_values", "number_of_test_samples",
                                     "number_of_training_samples",
                                     "train_iteration", "test_iteration"
                                 ])
        result_df.to_pickle(
            path=RESULT_FOLDER + MODEL_NAME +
            "/ANN_training_and_validation_result_df_train_size_" +
            str(TRAINING_SET_SIZE[0]) + "_up_to_test_iter_" +
            str(TEST_ITERATIONS_END) + ".pkl")