Esempio n. 1
0
def plot_tnk(mrs):
    test_bi_pca_all = data_utils.get_test_transformed('tnk', mrs)
    ot = test_bi_pca_all[test_bi_pca_all['label'] == -1]
    plt.scatter(ot[['pca_1']], ot[['pca_2']], linewidth=0, c='red', marker='^')
    noot = test_bi_pca_all[test_bi_pca_all['label'] != -1]
    plt.scatter(noot[['pca_1']],
                noot[['pca_2']],
                linewidth=0,
                c='blue',
                marker='X')
Esempio n. 2
0
def plot_new_points(test_dataset, mrs):
    test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs)
    # see what happened
    ot = test_bi_pca_all[test_bi_pca_all['label'] == -1]
    plt.scatter(ot[['pca_1']], ot[['pca_2']], s=50, linewidth=0, c='yellow', alpha=1, label='Test outliers')
    noot = test_bi_pca_all[test_bi_pca_all['label'] != -1]
    plt.scatter(noot[['pca_1']], noot[['pca_2']], s=50, linewidth=0, c='blue', alpha=1, label='Test data points')
    legend = plt.legend(loc='upper left')
    legend.legendHandles[2]._sizes = [30]
    legend.legendHandles[3]._sizes = [40]
Esempio n. 3
0
def plot_nih(mrs):
    test_bi_pca_all = data_utils.get_test_transformed('nih', mrs)
    ot = test_bi_pca_all[test_bi_pca_all['label'] == -1]
    plt.scatter(ot[['pca_1']],
                ot[['pca_2']],
                linewidth=0,
                c='red',
                marker='^',
                label='testing outliers')
    noot = test_bi_pca_all[test_bi_pca_all['label'] != -1]
    plt.scatter(noot[['pca_1']],
                noot[['pca_2']],
                linewidth=0,
                c='blue',
                marker='X',
                label='testing normal data')
Esempio n. 4
0
def predict_new_points(test_dataset, clusterer, mrs):
    test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs)

    labels = test_bi_pca_all[['label']]
    test_bi_pca = test_bi_pca_all.drop(['label'], axis=1)

    # see what happened
    ot = test_bi_pca_all[test_bi_pca_all['label'] == -1]
    plt.scatter(ot[['pca_1']], ot[['pca_2']], s=50, linewidth=0, c='yellow', alpha=1, label='Test outliers')
    noot = test_bi_pca_all[test_bi_pca_all['label'] != -1]
    plt.scatter(noot[['pca_1']], noot[['pca_2']], s=50, linewidth=0, c='blue', alpha=1, label='Test data points')
    legend = plt.legend(loc='upper left')
    legend.legendHandles[2]._sizes = [30]
    legend.legendHandles[3]._sizes = [40]
    #
    test_labels, strengths = hdbscan.approximate_predict(clusterer, test_bi_pca)
    test_labels [test_labels > -1] = 0
    sensitivity, specificity, accuracy = data_utils.show_performance(labels, test_labels)
    return sensitivity, specificity, accuracy
def predict_new_points(bi_df_pca_unique, test_dataset, mrs, cutoff):
    test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs)
    true_labels = test_bi_pca_all[['label']]
    test_bi_pca = test_bi_pca_all.drop(['label'], axis=1)
    train_bi_pca = bi_df_pca_unique.drop(['label'], axis=1)

    # see what happened
    ot = test_bi_pca_all[test_bi_pca_all['label'] == -1]
    plt.scatter(ot[['pca_1']],
                ot[['pca_2']],
                s=50,
                linewidth=0,
                c='yellow',
                alpha=1,
                label='Test outliers')
    noot = test_bi_pca_all[test_bi_pca_all['label'] != -1]
    plt.scatter(noot[['pca_1']],
                noot[['pca_2']],
                s=50,
                linewidth=0,
                c='blue',
                alpha=1,
                label='Test data points')
    legend = plt.legend(loc='upper left')
    legend.legendHandles[2]._sizes = [30]
    legend.legendHandles[3]._sizes = [40]
    #

    clf_predict = LocalOutlierFactor(n_neighbors=k_neighbors,
                                     contamination=outliers_fraction,
                                     novelty=True)
    clf_predict.fit(train_bi_pca)
    outlier_scores = -clf_predict.score_samples(test_bi_pca)
    test_labels = (outlier_scores > cutoff).astype(int) * -1
    sensitivity, specificity, accuracy = data_utils.show_performance(
        true_labels, test_labels)
    return sensitivity, specificity, accuracy