Ejemplo n.º 1
0
    #loaded_model = load("model_SVC.joblib")
    #SVM.test_svm_classifier(loaded_model, val_data, val_labels)

    loaded_model = load_model("models/best_model_DNN_Adam.h5")
    NN.test_neural_network(loaded_model, val_data, val_labels)


if __name__ == "__main__":
    total_features = 545333  # total unique features
    testing_set_size = 1500  # set site that will be used to create random test set
    malware_ratio = 0.3  # malware ratio in the set size

    print("Creating data-labels...")
    onehot.create_list_of_apps()  # function from set_one_encoding.py

    # initialize sklearn models
    GNB = models.GaussianNaiveBayes()
    MNB = models.MultinomialNaiveBayes()
    CNB = models.ComplementNaiveBayes()
    BNB = models.BernoulliNaiveBayes()
    DT = models.DecisionTree()
    RF = models.RandomForest()
    KNN = models.KNearestNeighbors()
    LR = models.LogRegression()
    SVM = models.SupportVectorMachine()

    val_runs = 8

    #evaluate_models(val_runs)
    evaluate_on_test_set()
Ejemplo n.º 2
0
def train_external_detector():

    train_data, train_labels, test_data, test_labels = create_sets()

    trained_model = tf.keras.models.load_model('best_model_Adam.h5')
    predict_original = trained_model.predict(train_data)
    confusion = confusion_matrix(train_labels,
                                 np.argmax(predict_original, axis=1))
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    FNR_original = FN / float(FN + TP) * 100
    FPR = FP / float(FP + TN) * 100
    accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100
    print(confusion)
    print("Original FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN)
    print("Original Accuracy:", accuracy, "- FPR:", FPR, "- FNR:",
          FNR_original)
    average_changes = 0
    amount_malwares = 0
    averageChanges = 0

    # the numpy array will be filled dynamically
    adversarial_data = np.zeros((0, 3880), dtype=float)

    for i in range(len(train_data)):
        if train_labels[i] == 1:

            x = train_data[i:i + 1]
            # print("x: ", x)
            # print(x.shape)
            try:
                adv_x, changes = craft_adversarial_samples(
                    x, 0, trained_model, 1)
                # print(adv_x)
                # append the adversarial data to the numpy array
                adversarial_data = np.concatenate((adversarial_data, adv_x))
                if changes >= 0:
                    average_changes += changes
                    amount_malwares += 1
            except NameError:
                pass
            except ValueError:
                pass

    if amount_malwares > 0:
        averageChanges += (average_changes / float(amount_malwares))

    train_data, train_labels, test_data, test_labels = create_sets()
    predictions = trained_model.predict(train_data)
    confusion = confusion_matrix(train_labels, np.argmax(predictions, axis=1))
    print(confusion)
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    FNR = FN / float(FN + TP) * 100
    FPR = FP / float(FP + TN) * 100
    accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100
    print("Adversarial  FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN)
    print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR)
    print("Misclassification Rate:", FNR - FNR_original)
    print("Distortion:", averageChanges)
    predictions = trained_model.predict(adversarial_data)
    adversarial_labels = np.ones((len(adversarial_data), ), dtype=int)
    confusion = confusion_matrix(adversarial_labels,
                                 np.argmax(predictions, axis=1))
    print(confusion)
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    FNR = FN / float(FN + TP) * 100
    FPR = FP / float(FP + TN) * 100
    accuracy = ((TP + TN) / float(TP + TN + FP + FN)) * 100
    print("Adversarial  FP:", FP, "- FN:", FN, "- TP:", TP, "- TN", TN)
    print("Adversarial Accuracy:", accuracy, "- FPR:", FPR, "- FNR:", FNR)
    print("Misclassification Rate:", FNR - FNR_original)
    print("Distortion:", averageChanges)
    print(changes_dict)
    del predict_original, FNR_original, predictions, confusion, TP, TN, FP, FN, FNR, FPR, accuracy

    # concatenate legit with produced adversarial input
    final_train_data = np.concatenate((train_data, adversarial_data))
    print("final train data shape:", final_train_data.shape)

    train_labels = np.zeros((len(train_labels), ),
                            dtype=int)  # fill with 0 (the original class)
    print("train labels shape:", train_labels.shape)

    adverarial_labels = np.ones(
        (len(adversarial_data), ),
        dtype=int)  # fill with 1 (the adversarial class)
    print("adversarial labels:", adverarial_labels.shape)

    final_train_labels = np.concatenate((train_labels, adverarial_labels))
    print("final labels shape:", final_train_labels.shape)
    print("Unique classes:", np.unique(final_train_labels))

    del train_data, train_labels, adversarial_data, adverarial_labels
    #shuffle the set
    shuffle(final_train_data, final_train_labels, random_state=123)

    # train with the augmented dataset (with adverarial examples belong to class '1')
    model = generate_neural_network(total_features, [200, 200], 0.2, 0.001,
                                    "glorot_uniform", "zeros", "relu", 2)
    train_neural_network(model,
                         epochs=30,
                         batch_size=150,
                         features=final_train_data,
                         labels=final_train_labels,
                         verbose=2,
                         validation=True,
                         val_data=final_train_data,
                         val_labels=final_train_labels,
                         callbacks=True,
                         path=dir_path + "logs/fit/",
                         model_name="external_detector_2")
    GNB = models.GaussianNaiveBayes()
    MNB = models.MultinomialNaiveBayes()
    CNB = models.ComplementNaiveBayes()
    BNB = models.BernoulliNaiveBayes()
    DT = models.DecisionTree()
    RF = models.RandomForest()
    KNN = models.KNearestNeighbors()
    LR = models.LogRegression()
    SVM = models.SupportVectorMachine()

    model = GNB.train_gaussian_naive_bayes_classifier(
        final_train_data, final_train_labels)  # train Naive Bayes
    score_GNB = GNB.evaluate_gaussian_naive_bayes_classifier(
        model, final_train_data, final_train_labels)  # test performance
    print("GNB", score_GNB)

    model = MNB.train_multi_naive_bayes_classifier(final_train_data,
                                                   final_train_labels)
    score_MNB = MNB.evaluate_multi_naive_bayes_classifier(
        model, final_train_data, final_train_labels)
    print("MNB", score_MNB)

    model = CNB.train_complement_naive_bayes_classifier(
        final_train_data, final_train_labels)
    score_CNB = CNB.evaluate_complement_naive_bayes_classifier(
        model, final_train_data, final_train_labels)
    print("CNB", score_CNB)

    model = BNB.train_bernoulli_naive_bayes_classifier(final_train_data,
                                                       final_train_labels)
    score_BNB = BNB.evaluate_bernoulli_naive_bayes_classifier(
        model, test_data, test_labels)
    print("BNB", score_BNB)

    model = DT.train_decision_tree_classifier(
        final_train_data, final_train_labels)  # train Decision Tree Classifier
    score_dt = DT.evaluate_decision_tree_classifier(model, final_train_data,
                                                    final_train_labels)
    print("DT:", score_dt)

    model = LR.train_logistic_regression_classifier(
        final_train_data, final_train_labels)  # train logistic Regression
    score_lr = LR.evaluate_logistic_regression_classifier(
        model, final_train_data, final_train_labels)
    print("LR", score_lr)

    model = KNN.train_knn_classifier(
        final_train_data,
        final_train_labels)  # train k-Nearest Neighbors Classifier
    score_knn = KNN.evaluate_knn_classifier(model, final_train_data,
                                            final_train_labels)
    print("KNN", score_knn)

    model = SVM.train_svm_classifier(
        final_train_data, final_train_labels)  # train Support Vector Machines
    score_svm = SVM.evaluate_svm_classifier(model, final_train_data,
                                            final_train_labels)
    print("SVM", score_svm)

    model = RF.train_random_forest_classifier(
        final_train_data, final_train_labels)  # train Random Forest
    score_rf = RF.evaluate_random_forest_classifier(model, final_train_data,
                                                    final_train_labels)
    print("RF:", score_rf)