Ejemplo n.º 1
0
def get_TOS_loop(X, y, k_list, feature_list):
    # only compatible with pandas
    df_X = pd.DataFrame(X)

    result_loop = np.zeros([X.shape[0], len(k_list)])
    roc_loop = []
    prec_loop = []

    for i in range(len(k_list)):
        k = k_list[i]
        clf = loop.LocalOutlierProbability(df_X, n_neighbors=k).fit()
        score_pred = clf.local_outlier_probabilities.astype(float)

        roc = np.round(roc_auc_score(y, score_pred), decimals=4)
        # apc = np.round(average_precision_score(y, score_pred), decimals=4)
        prec_n = np.round(get_precn(y, score_pred), decimals=4)

        print('LoOP @ {k} - ROC: {roc} Precision@n: {pren}'.format(
            k=k, roc=roc, pren=prec_n))

        feature_list.append('loop_' + str(k))
        roc_loop.append(roc)
        prec_loop.append(prec_n)
        result_loop[:, i] = score_pred
    print()
    return feature_list, roc_loop, prec_loop, result_loop
Ejemplo n.º 2
0
def get_TOS_knn(X, y, k_list, feature_list):
    knn_clf = ["knn_mean", "knn_median", "knn_kth"]

    result_knn = np.zeros([X.shape[0], len(k_list) * len(knn_clf)])
    roc_knn = []
    prec_knn = []

    for i in range(len(k_list)):
        k = k_list[i]
        k_mean, k_median, k_k = knn(X, n_neighbors=k)
        knn_result = [k_mean, k_median, k_k]

        for j in range(len(knn_result)):
            score_pred = knn_result[j]
            clf = knn_clf[j]

            roc = np.round(roc_auc_score(y, score_pred), decimals=4)
            # apc = np.round(average_precision_score(y, score_pred), decimals=4)
            prec_n = np.round(get_precn(y, score_pred), decimals=4)
            print('{clf} @ {k} - ROC: {roc} Precision@n: {pren}'.format(
                clf=clf, k=k, roc=roc, pren=prec_n))
            feature_list.append(clf + str(k))
            roc_knn.append(roc)
            prec_knn.append(prec_n)
            result_knn[:, i * len(knn_result) + j] = score_pred

    print()
    return feature_list, roc_knn, prec_knn, result_knn
Ejemplo n.º 3
0
def get_TOS_iforest(X, y, n_list, feature_list):
    result_if = np.zeros([X.shape[0], len(n_list)])
    roc_if = []
    prec_if = []

    for i in range(len(n_list)):
        n = n_list[i]
        clf = IsolationForest(n_estimators=n)
        clf.fit(X)
        score_pred = clf.decision_function(X)

        roc = np.round(roc_auc_score(y, score_pred * -1), decimals=4)
        prec_n = np.round(get_precn(y, y_pred=(score_pred * -1)), decimals=4)

        print('Isolation Forest @ {n} - ROC: {roc} Precision@n: {pren}'.format(
            n=n, roc=roc, pren=prec_n))
        feature_list.append('if_' + str(n))
        roc_if.append(roc)
        prec_if.append(prec_n)
        result_if[:, i] = score_pred.reshape(score_pred.shape[0]) * -1
    print()
    return feature_list, roc_if, prec_if, result_if
Ejemplo n.º 4
0
def get_TOS_svm(X, y, nu_list, feature_list):
    result_ocsvm = np.zeros([X.shape[0], len(nu_list)])
    roc_ocsvm = []
    prec_ocsvm = []

    for i in range(len(nu_list)):
        nu = nu_list[i]
        clf = OneClassSVM(nu=nu)
        clf.fit(X)
        score_pred = clf.decision_function(X)

        roc = np.round(roc_auc_score(y, score_pred * -1), decimals=4)

        # apc = np.round(average_precision_score(y, score_pred * -1), decimals=4)
        prec_n = np.round(get_precn(y, score_pred * -1), decimals=4)
        print('svm @ {nu} - ROC: {roc} Precision@n: {pren}'.format(
            nu=nu, roc=roc, pren=prec_n))
        feature_list.append('ocsvm_' + str(nu))
        roc_ocsvm.append(roc)
        prec_ocsvm.append(prec_n)
        result_ocsvm[:, i] = score_pred.reshape(score_pred.shape[0]) * -1
    print()
    return feature_list, roc_ocsvm, prec_ocsvm, result_ocsvm
Ejemplo n.º 5
0
def get_TOS_lof(X, y, k_list, feature_list):
    result_lof = np.zeros([X.shape[0], len(k_list)])
    roc_lof = []
    prec_lof = []

    for i in range(len(k_list)):
        k = k_list[i]
        clf = LocalOutlierFactor(n_neighbors=k)
        y_pred = clf.fit_predict(X)
        score_pred = clf.negative_outlier_factor_

        roc = np.round(roc_auc_score(y, score_pred * -1), decimals=4)
        # apc = np.round(average_precision_score(y, score_pred * -1), decimals=4)
        prec_n = np.round(get_precn(y, score_pred * -1), decimals=4)
        print('LOF @ {k} - ROC: {roc} Precision@n: {pren}'.format(k=k,
                                                                  roc=roc,
                                                                  pren=prec_n))

        feature_list.append('lof_' + str(k))
        roc_lof.append(roc)
        prec_lof.append(prec_n)
        result_lof[:, i] = score_pred * -1
    print()
    return feature_list, roc_lof, prec_lof, result_lof
Ejemplo n.º 6
0
def get_TOS_hbos(X, y, k_list, feature_list):
    result_hbos = np.zeros([X.shape[0], len(k_list)])
    roc_hbos = []
    prec_hbos = []

    k_list = [3, 5, 7, 9, 12, 15, 20, 25, 30, 50]
    for i in range(len(k_list)):
        k = k_list[i]
        clf = Hbos(bins=k, alpha=0.3)
        clf.fit(X)
        score_pred = clf.decision_scores

        roc = np.round(roc_auc_score(y, score_pred), decimals=4)
        # apc = np.round(average_precision_score(y, score_pred * -1), decimals=4)
        prec_n = np.round(get_precn(y, score_pred), decimals=4)
        print('HBOS @ {k} - ROC: {roc} Precision@n: {pren}'.format(
            k=k, roc=roc, pren=prec_n))

        feature_list.append('hbos_' + str(k))
        roc_hbos.append(roc)
        prec_hbos.append(prec_n)
        result_hbos[:, i] = score_pred
    print()
    return feature_list, roc_hbos, prec_hbos, result_hbos
Ejemplo n.º 7
0
    X_train_n = X_train[:, original_len:]
    X_test_n = X_test[:, original_len:]

    for clf, clf_name in zip(clf_list, clf_name_list):
        print('processing', clf_name, 'round', i + 1)
        if clf_name != 'xgb':
            clf = BalancedBaggingClassifier(base_estimator=clf,
                                            ratio='auto',
                                            replacement=False)

        # fully supervised
        clf.fit(X_train_o, y_train.ravel())
        y_pred = clf.predict_proba(X_test_o)

        roc_score = roc_auc_score(y_test, y_pred[:, 1])
        prec_n = get_precn(y_test, y_pred[:, 1])

        result_dict[clf_name + 'ROC' + 'o'].append(roc_score)
        result_dict[clf_name + 'PRC@n' + 'o'].append(prec_n)

        # unsupervised
        clf.fit(X_train_n, y_train.ravel())
        y_pred = clf.predict_proba(X_test_n)

        roc_score = roc_auc_score(y_test, y_pred[:, 1])
        prec_n = get_precn(y_test, y_pred[:, 1])

        result_dict[clf_name + 'ROC' + 'n'].append(roc_score)
        result_dict[clf_name + 'PRC@n' + 'n'].append(prec_n)

        # semi-supervised
Ejemplo n.º 8
0
    def evaluate(self, X_test, y_test):
        pred_score = self.sample_scores(X_test)
        prec_n = (get_precn(y_test, pred_score))

        print("precision@n", prec_n)