Python geometric_mean Examples

Programming Language: Python

Namespace/Package Name: machine_learning.geometric_mean

Method/Function: geometric_mean

Examples at hotexamples.com: 4

Python geometric_mean - 4 examples found. These are the top rated real world Python examples of machine_learning.geometric_mean.geometric_mean extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: run_sgd.py Project: RafailSkoulos17/malicious_PHP_detection

def sgd_run(X, y, splits, loss):
    book = xlwt.Workbook(encoding="utf-8")

    sheet1 = book.add_sheet("SGD")
    sheet1.write(0, 0, "Loss")
    sheet1.write(0, 1, "Penalty")
    sheet1.write(0, 2, "Apha")
    sheet1.write(0, 3, "L1 ratio")
    sheet1.write(0, 4, "Learning rate")
    sheet1.write(0, 5, "Warm start")
    sheet1.write(0, 6, "Class weigtht")

    sheet1.write(0, 7, "Precision")
    sheet1.write(0, 8, "Recall")
    sheet1.write(0, 9, "F1-score")
    sheet1.write(0, 10, "Geometric mean")
    sheet1.write(0, 11, "True Positive Rate")
    sheet1.write(0, 12, "False Positive Rate")
    sheet1.write(0, 13, "False Negative Rate")

    sgd_params = sgd_randomized_search(X, y, loss)
    res = []

    with open(os.path.join("outputs", "results", "test", "sdg_" + str(loss) + "_results.txt"), "w") as f:
        pass

    for index, result in enumerate(sgd_params):
        penalty = result[1]
        alpha = result[2]
        l1_ratio = result[3]
        learning_rate = result[4]
        warm_start = result[5]
        class_weight = result[6]
        precision_list = []
        recall_list = []
        f1_list = []
        gmean_list = []
        TPR = []
        FPR = []
        FNR = []
        for train_index, test_index in splits:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            scaler = StandardScaler().fit(X_train)
            X_train_scaled = scaler.transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            clf = SGDClassifier(loss=loss, penalty=penalty, alpha=alpha,
                                l1_ratio=l1_ratio, learning_rate=learning_rate,
                                warm_start=warm_start, class_weight=class_weight,
                                random_state=42)
            clf.fit(X_train_scaled, y_train)
            y_pred = clf.predict(X_test_scaled)
            tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

            TPR.append(round(float(tp) / (tp + fn), 2)) # also called specificity
            FPR.append(round(float(fp) / (fp + tn), 2))
            FNR.append(round(float(fn) / (tp + fn), 2))

            precision = round(precision_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            precision_list.append(precision)

            recall = round(recall_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            recall_list.append(recall)

            f1 = round(f1_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            f1_list.append(f1)

            gmean = round(geometric_mean(y_test, y_pred), 2)
            gmean_list.append(gmean)

        TPR_mean = round(np.mean(TPR), 2)
        TPR_var = round(np.var(TPR) ** (0.5) * 2, 2)

        FPR_mean = round(np.mean(FPR), 2)
        FPR_var = round(np.var(FPR) ** (0.5) * 2, 2)

        FNR_mean = round(np.mean(FNR), 2)
        FNR_var = round(np.var(FNR) ** (0.5) * 2, 2)

        precision_mean = round(np.mean(precision_list), 2)
        precision_var = round(np.var(precision_list) ** (0.5) * 2, 2)

        recall_mean = round(np.mean(recall_list), 2)
        recall_var = round(np.var(recall_list) ** (0.5) * 2, 2)

        f1_mean = round(np.mean(f1_list), 2)
        f1_var = round(np.var(f1_list) ** (0.5) * 2, 2)

        gmean_mean = round(np.mean(gmean_list), 2)
        gmean_var = round(np.var(gmean_list) ** (0.5) * 2, 2)

        res.append([loss, penalty, alpha,
                    l1_ratio, learning_rate,
                    warm_start, class_weight,
                    (precision_mean, precision_var), (recall_mean, recall_var),
                    (f1_mean, f1_var), (gmean_mean, gmean_var), (TPR_mean, TPR_var),
                    (FPR_mean, FPR_var), (FNR_mean, FNR_var)])

        with open(os.path.join("outputs", "results", "test", "sdg_" + str(loss) + "_results.txt"), "a") as f:
            f.write("loss={0}, penalty={1}, alpha={2}, l1 ratio={3}, learning rate={4}, "
                    "warm start={5}, class weight={6}\n".format(
                     loss, penalty, alpha, l1_ratio, learning_rate, warm_start,
                     str(class_weight)))
            f.write("PRECISION SCORES: {0}, PRECISION MEAN: {1}, PRECISION VARIANCE: {2}\n".format(str(precision_list),
                                                                                                   precision_mean,
                                                                                                   precision_var))
            f.write("RECALL SCORES: {0}, RECALL MEAN: {1}, RECALL VARIANCE: {2}\n".format(str(recall_list),
                                                                                          recall_mean,
                                                                                          recall_var))
            f.write("F1 SCORES: {0}, F1 MEAN: {1}, F1 VARIANCE: {2}\n".format(str(f1_list),
                                                                              f1_mean,
                                                                              f1_var))
            f.write("GEOMETRIC MEAN SCORES: {0}, GEOMETRIC MEAN MEAN: {1}, GEOMETRIC MEAN VARIANCE: {2}\n".format(
                str(gmean_list),
                gmean_mean,
                gmean_var))

            f.write('TRUE POSITIVE RATE SCORES: {0},  TRUE POSITIVE RATE MEAN: {1}, TRUE POSITIVE RATE VARIANCE:'
                    ' {2}\n'.format(TPR, TPR_mean, TPR_var))

            f.write('FALSE POSITIVE RATE SCORES: {0},  FALSE POSITIVE RATE MEAN: {1}, FALSE POSITIVE RATE VARIANCE:'
                    ' {2}\n'.format(FPR, FPR_mean, FPR_var))

            f.write('FALSE NEGATIVE RATE SCORES: {0},  FALSE NEGATIVE RATE MEAN: {1}, FALSE NEGATIVE RATE VARIANCE:'
                    ' {2}\n'.format(FNR, FNR_mean, FNR_var))

            f.write("----------------------------------------------------------\n")

    # res = sorted(res, key=operator.itemgetter(7), reverse=True)

    for index, result in enumerate(res):
        sheet1.write(index+1, 0, result[0])
        sheet1.write(index+1, 1, result[1])
        sheet1.write(index+1, 2, result[2])
        sheet1.write(index+1, 3, result[3])
        sheet1.write(index+1, 4, result[4])
        sheet1.write(index+1, 5, result[5])
        sheet1.write(index+1, 6, str(result[6]))
        sheet1.write(index+1, 7, str(result[7]))
        sheet1.write(index+1, 8, str(result[8]))
        sheet1.write(index+1, 9, str(result[9]))
        sheet1.write(index+1, 10, str(result[10]))
        sheet1.write(index+1, 11, str(result[11]))
        sheet1.write(index+1, 12, str(result[12]))
        sheet1.write(index+1, 13, str(result[13]))

    book.save(os.path.join("outputs", "results", "test", "SGD_" + str(loss) + "_test_results.xls"))

Example #2

Show file

def svm_run(X, y, splits):
    book = xlwt.Workbook(encoding="utf-8")

    sheet1 = book.add_sheet("SVM")
    sheet1.write(0, 0, "Kernel")
    sheet1.write(0, 1, "C")
    sheet1.write(0, 2, "Gamma")
    sheet1.write(0, 3, "Class weight")
    sheet1.write(0, 4, "Decision_function_shape")

    sheet1.write(0, 5, "Precision")
    sheet1.write(0, 6, "Recall")
    sheet1.write(0, 7, "F1-score")
    sheet1.write(0, 8, "Geometric mean")
    sheet1.write(0, 9, "True Positive Rate")
    sheet1.write(0, 10, "False Positive Rate")
    sheet1.write(0, 11, "False Negative Rate")

    svm_params = svm_randomized_search(X, y)
    res = []

    with open(os.path.join("outputs", "results", "test", "svm_results.txt"), "w") as f:
        pass

    for index, result in enumerate(svm_params):
        kernel = result[0]
        C = result[1]
        gamma = result[2]
        class_weight = result[3]
        decision_function_shape = result[4]
        precision_list = []
        recall_list = []
        f1_list = []
        gmean_list = []
        TPR = []
        FPR = []
        FNR = []

        for train_index, test_index in splits:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            scaler = StandardScaler().fit(X_train)
            X_train_scaled = scaler.transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            clf = SVC(kernel=kernel, C=C, gamma=gamma,
                      class_weight=class_weight,
                      decision_function_shape=decision_function_shape,
                      random_state=42)
            clf.fit(X_train_scaled, y_train)
            y_pred = clf.predict(X_test_scaled)
            tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

            TPR.append(round(float(tp) / (tp + fn), 2)) # also called specificity
            FPR.append(round(float(fp) / (fp + tn), 2))
            FNR.append(round(float(fn) / (tp + fn), 2))

            precision = round(precision_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            precision_list.append(precision)

            recall = round(recall_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            recall_list.append(recall)

            f1 = round(f1_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            f1_list.append(f1)

            gmean = round(geometric_mean(y_test, y_pred), 2)
            gmean_list.append(gmean)

        TPR_mean = round(np.mean(TPR), 2)
        TPR_var = round(np.var(TPR) ** (0.5) * 2, 2)

        FPR_mean = round(np.mean(FPR), 2)
        FPR_var = round(np.var(FPR) ** (0.5) * 2, 2)

        FNR_mean = round(np.mean(FNR), 2)
        FNR_var = round(np.var(FNR) ** (0.5) * 2, 2)

        precision_mean = round(np.mean(precision_list), 2)
        precision_var = round(np.var(precision_list) ** (0.5) * 2, 2)

        recall_mean = round(np.mean(recall_list), 2)
        recall_var = round(np.var(recall_list) ** (0.5) * 2, 2)

        f1_mean = round(np.mean(f1_list), 2)
        f1_var = round(np.var(f1_list) ** (0.5) * 2, 2)

        gmean_mean = round(np.mean(gmean_list), 2)
        gmean_var = round(np.var(gmean_list) ** (0.5) * 2, 2)


        res.append([kernel, C, gamma, class_weight, decision_function_shape,
                    (precision_mean, precision_var), (recall_mean, recall_var),
                    (f1_mean, f1_var), (gmean_mean, gmean_var), (TPR_mean, TPR_var),
                    (FPR_mean, FPR_var), (FNR_mean, FNR_var)])

        with open(os.path.join("outputs", "results", "test", "svm_results.txt"), "a") as f:
            f.write("kernel={0}, C={1}, gamma={2}, class_weight={3}, decision_function_shape={4}\n".format(
                kernel, C, gamma, str(class_weight),decision_function_shape))
            f.write("PRECISION SCORES: {0}, PRECISION MEAN: {1}, PRECISION VARIANCE: {2}\n".format(str(precision_list),
                                                                                                   precision_mean,
                                                                                                   precision_var))
            f.write("RECALL SCORES: {0}, RECALL MEAN: {1}, RECALL VARIANCE: {2}\n".format(str(recall_list),
                                                                                          recall_mean,
                                                                                          recall_var))
            f.write("F1 SCORES: {0}, F1 MEAN: {1}, F1 VARIANCE: {2}\n".format(str(f1_list),
                                                                              f1_mean,
                                                                              f1_var))
            f.write("GEOMETRIC MEAN SCORES: {0}, GEOMETRIC MEAN MEAN: {1}, GEOMETRIC MEAN VARIANCE: {2}\n".format(
                                                                                        str(gmean_list),
                                                                                        gmean_mean,
                                                                                        gmean_var))
            f.write('TRUE POSITIVE RATE SCORES: {0},  TRUE POSITIVE RATE MEAN: {1}, TRUE POSITIVE RATE VARIANCE:'
                    ' {2}\n'.format(TPR, TPR_mean, TPR_var))

            f.write('FALSE POSITIVE RATE SCORES: {0},  FALSE POSITIVE RATE MEAN: {1}, FALSE POSITIVE RATE VARIANCE:'
                    ' {2}\n'.format(FPR, FPR_mean, FPR_var))

            f.write('FALSE NEGATIVE RATE SCORES: {0},  FALSE NEGATIVE RATE MEAN: {1}, FALSE NEGATIVE RATE VARIANCE:'
                    ' {2}\n'.format(FNR, FNR_mean, FNR_var))

            f.write("----------------------------------------------------------\n")

    # res = sorted(res, key=operator.itemgetter(7), reverse=True)

    for index, result in enumerate(res):
        sheet1.write(index+1, 0, result[0])
        sheet1.write(index+1, 1, result[1])
        sheet1.write(index+1, 2, result[2])
        sheet1.write(index+1, 3, str(result[3]))
        sheet1.write(index+1, 4, result[4])
        sheet1.write(index+1, 5, str(result[5]))
        sheet1.write(index+1, 6, str(result[6]))
        sheet1.write(index+1, 7, str(result[7]))
        sheet1.write(index+1, 8, str(result[8]))
        sheet1.write(index+1, 9, str(result[9]))
        sheet1.write(index+1, 10, str(result[10]))
        sheet1.write(index+1, 11, str(result[11]))

    book.save(os.path.join("outputs", "results", "test", "SVM_test_results.xls"))

Example #3

Show file

    std_scale = preprocessing.StandardScaler().fit(X_train)
    X_train = std_scale.transform(X_train)
    X_test = std_scale.transform(X_test)

    clf_entropy = DecisionTreeClassifier(criterion="entropy",
                                         random_state=100,
                                         max_depth=10,
                                         min_samples_leaf=5)
    clf_entropy.fit(X_train, y_train)
    y_pred = clf_entropy.predict(X_test)

    tn, fp, fn, tp = conf_matrix = confusion_matrix(y_test, y_pred).ravel()
    print "(TN, FP, FN, TP) = {}".format((tn, fp, fn, tp))

    gmean += geometric_mean(y_test, y_pred) * 100
    precision_mal += precision_score(
        y_test, y_pred, average="binary", pos_label=-1) * 100
    recall_mal += recall_score(y_test, y_pred, average="binary",
                               pos_label=-1) * 100
    f1_mal += f1_score(y_test, y_pred, average='binary', pos_label=-1) * 100
    f_scores.append(
        f1_score(y_test, y_pred, average='binary', pos_label=-1) * 100)

    # accuracy = accuracy + accuracy_score(y_test,y_pred)*100
    # precision_ben = precision_ben + precision_score(y_test, y_pred, average="binary", pos_label=1)*100
    # precision_avg = precision_avg + precision_score(y_test, y_pred, average="weighted", labels=[1, -1])*100
    # recall_ben = recall_ben + recall_score(y_test, y_pred, average="binary", pos_label=1)*100
    # recall_avg = recall_avg + recall_score(y_test, y_pred, average="weighted", labels=[1, -1])*100
    # f1_ben = f1_ben + f1_score(y_test, y_pred, average='binary', pos_label=1)*100
    # f1_avg = f1_avg + f1_score(y_test, y_pred, average="weighted", labels=[1, -1])*100

Example #4

Show file

def decision_tree_run(X, y, splits):
    book = xlwt.Workbook(encoding="utf-8")

    sheet1 = book.add_sheet("Decision Tree")
    sheet1.write(0, 0, "Criterion")
    sheet1.write(0, 1, "Min samples split")
    sheet1.write(0, 2, "Max depth")
    sheet1.write(0, 3, "Min samples leaf")
    sheet1.write(0, 4, "Max leaf nodes")
    sheet1.write(0, 5, "Splitter")
    sheet1.write(0, 6, "Max features")
    sheet1.write(0, 7, "Class weight")

    sheet1.write(0, 8, "Precision")
    sheet1.write(0, 9, "Recall")
    sheet1.write(0, 10, "F1-score")
    sheet1.write(0, 11, "Geometric mean")
    sheet1.write(0, 12, "True Positive Rate")
    sheet1.write(0, 13, "False Positive Rate")
    sheet1.write(0, 14, "False Negative Rate")

    decision_tree_params = decision_tree_randomized_search(X, y)
    res = []

    with open(
            os.path.join("outputs", "results", "test",
                         "decision_tree_results.txt"), "w") as f:
        pass

    for index, result in enumerate(decision_tree_params):
        criterion = result[0]
        min_samples_split = result[1]
        max_depth = result[2]
        min_samples_leaf = result[3]
        max_leaf_nodes = result[4]
        splitter = result[5]
        max_features = result[6]
        class_weight = result[7]
        precision_list = []
        recall_list = []
        f1_list = []
        gmean_list = []
        TPR = []
        FPR = []
        FNR = []

        for train_index, test_index in splits:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            clf = DecisionTreeClassifier(criterion=criterion,
                                         max_depth=max_depth,
                                         max_features=max_features,
                                         max_leaf_nodes=max_leaf_nodes,
                                         min_samples_leaf=min_samples_leaf,
                                         min_samples_split=min_samples_split,
                                         class_weight=class_weight,
                                         splitter=splitter,
                                         random_state=42)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

            TPR.append(round(float(tp) / (tp + fn),
                             2))  # also called specificity
            FPR.append(round(float(fp) / (fp + tn), 2))
            FNR.append(round(float(fn) / (tp + fn), 2))

            precision = round(
                precision_score(y_test, y_pred, average="binary",
                                pos_label=-1), 2)
            precision_list.append(precision)

            recall = round(
                recall_score(y_test, y_pred, average="binary", pos_label=-1),
                2)
            recall_list.append(recall)

            f1 = round(
                f1_score(y_test, y_pred, average="binary", pos_label=-1), 2)
            f1_list.append(f1)

            gmean = round(geometric_mean(y_test, y_pred), 2)
            gmean_list.append(gmean)

        TPR_mean = round(np.mean(TPR), 2)
        TPR_var = round(np.var(TPR)**(0.5) * 2, 2)

        FPR_mean = round(np.mean(FPR), 2)
        FPR_var = round(np.var(FPR)**(0.5) * 2, 2)

        FNR_mean = round(np.mean(FNR), 2)
        FNR_var = round(np.var(FNR)**(0.5) * 2, 2)

        precision_mean = round(np.mean(precision_list), 2)
        precision_var = round(np.var(precision_list)**(0.5) * 2, 2)

        recall_mean = round(np.mean(recall_list), 2)
        recall_var = round(np.var(recall_list)**(0.5) * 2, 2)

        f1_mean = round(np.mean(f1_list), 2)
        f1_var = round(np.var(f1_list)**(0.5) * 2, 2)

        gmean_mean = round(np.mean(gmean_list), 2)
        gmean_var = round(np.var(gmean_list)**(0.5) * 2, 2)

        res.append([
            criterion, min_samples_split, max_depth, min_samples_leaf,
            max_leaf_nodes, splitter, max_features, class_weight,
            (precision_mean, precision_var), (recall_mean, recall_var),
            (f1_mean, f1_var), (gmean_mean, gmean_var), (TPR_mean, TPR_var),
            (FPR_mean, FPR_var), (FNR_mean, FNR_var)
        ])

        with open(
                os.path.join("outputs", "results", "test",
                             "decision_tree_results.txt"), "a") as f:
            f.write(
                "criterion={0}, min_samples_split={1}, max_depth={2}, min_samples_leaf={3}, max_leaf_nodes={4}, "
                "splitter = {5}, max_features= {6}, class_weight={7} \n".
                format(criterion, min_samples_split, max_depth,
                       min_samples_leaf, max_leaf_nodes, splitter,
                       max_features, str(class_weight)))
            f.write(
                "PRECISION SCORES: {0}, PRECISION MEAN: {1}, PRECISION VARIANCE: {2}\n"
                .format(str(precision_list), precision_mean, precision_var))
            f.write(
                "RECALL SCORES: {0}, RECALL MEAN: {1}, RECALL VARIANCE: {2}\n".
                format(str(recall_list), recall_mean, recall_var))
            f.write("F1 SCORES: {0}, F1 MEAN: {1}, F1 VARIANCE: {2}\n".format(
                str(f1_list), f1_mean, f1_var))
            f.write(
                "GEOMETRIC MEAN SCORES: {0}, GEOMETRIC MEAN MEAN: {1}, GEOMETRIC MEAN VARIANCE: {2}\n"
                .format(str(gmean_list), gmean_mean, gmean_var))

            f.write(
                'TRUE POSITIVE RATE SCORES: {0},  TRUE POSITIVE RATE MEAN: {1}, TRUE POSITIVE RATE VARIANCE:'
                ' {2}\n'.format(TPR, TPR_mean, TPR_var))

            f.write(
                'FALSE POSITIVE RATE SCORES: {0},  FALSE POSITIVE RATE MEAN: {1}, FALSE POSITIVE RATE VARIANCE:'
                ' {2}\n'.format(FPR, FPR_mean, FPR_var))

            f.write(
                'FALSE NEGATIVE RATE SCORES: {0},  FALSE NEGATIVE RATE MEAN: {1}, FALSE NEGATIVE RATE VARIANCE:'
                ' {2}\n'.format(FNR, FNR_mean, FNR_var))

            f.write(
                "----------------------------------------------------------\n")

    # res = sorted(res, key=operator.itemgetter(7), reverse=True)

    for index, result in enumerate(res):
        sheet1.write(index + 1, 0, result[0])
        sheet1.write(index + 1, 1, result[1])
        sheet1.write(index + 1, 2, result[2])
        sheet1.write(index + 1, 3, result[3])
        sheet1.write(index + 1, 4, result[4])
        sheet1.write(index + 1, 5, result[5])
        sheet1.write(index + 1, 6, result[6])
        sheet1.write(index + 1, 7, str(result[7]))
        sheet1.write(index + 1, 8, str(result[8]))
        sheet1.write(index + 1, 9, str(result[9]))
        sheet1.write(index + 1, 10, str(result[10]))
        sheet1.write(index + 1, 11, str(result[11]))
        sheet1.write(index + 1, 12, str(result[12]))
        sheet1.write(index + 1, 13, str(result[13]))
        sheet1.write(index + 1, 14, str(result[14]))

    book.save(
        os.path.join("outputs", "results", "test",
                     "Decision_tree_test_results.xls"))