Example #1
0
        data[3 * index2].append(mahalanobis[0])
        data[3 * index2 + 1].append(mean(mahalanobis[:3]))
        data[3 * index2 + 2].append(mean(mahalanobis[:5]))

for index, m in enumerate(last[2:]):
    for index2, (o, to) in enumerate(zip(outliers, true_outliers)):
        filename = constant + o + m
        print(filename)
        headers = [
            "Distance", "file", "k", "m2", "time", "precision", "reported"
        ]
        df = pd.read_csv(filename, header=None, names=headers)
        df["recall"] = df["precision"] * df["reported"] / to
        df["f1"] = 2 * df["precision"] * df["recall"] / (df["precision"] +
                                                         df["recall"])

        group_by_distance = df.groupby(df.Distance)
        mahalanobis_df = group_by_distance.get_group("mahalanobis")
        mahalanobis = sorted(list(mahalanobis_df["f1"]), reverse=True)
        print(mahalanobis)
        data[3 * index2].append(mahalanobis[0])
        data[3 * index2 + 1].append(mean(mahalanobis[:3]))
        data[3 * index2 + 2].append(mean(mahalanobis[:5]))

results = pd.DataFrame(index=names, columns=methods, data=data)
ranks = autorank(results, alpha=0.01)
create_report(ranks)
x = plot_stats(ranks, allow_insignificant=True)
x.get_figure().savefig("output2/results/mahalanobis_small_insignificant.eps",
                       format="eps",
                       bbox_inches="tight")
Example #2
0
    euclidean_df = group_by_distance.get_group("euclidean")
    euclidean_df = euclidean_df[euclidean_df["m2"] == true_outliers[index]]
    euclidean = sorted(list(euclidean_df["f1"]), reverse=True)
    data[3 * index].append(euclidean[0])
    data[3 * index + 1].append(mean(euclidean[:2]))
    data[3 * index + 2].append(mean(euclidean[:3]))

    #for minkowski
    minkowski_df = group_by_distance.get_group("minkowski")
    minkowski_df = minkowski_df[minkowski_df["m2"] == true_outliers[index]]
    minkowski = sorted(list(minkowski_df["f1"]), reverse=True)
    data[3 * index].append(minkowski[0])
    data[3 * index + 1].append(mean(minkowski[:2]))
    data[3 * index + 2].append(mean(minkowski[:3]))

    #for mahalanobis
    mahalanobis_df = group_by_distance.get_group("mahalanobis")
    mahalanobis_df = mahalanobis_df[mahalanobis_df["m2"] ==
                                    true_outliers[index]]
    mahalanobis = sorted(list(mahalanobis_df["f1"]), reverse=True)
    data[3 * index].append(mahalanobis[0])
    data[3 * index + 1].append(mean(mahalanobis[:2]))
    data[3 * index + 2].append(mean(mahalanobis[:3]))

import matplotlib.pyplot as plt
results = pd.DataFrame(index=names, columns=metrics, data=data)
ranks = autorank(results, alpha=0.01)
create_report(ranks)
x = plot_stats(ranks)
plt.tight_layout()
x.get_figure().savefig("output2/results/topz_distances.eps", format="eps")
Example #3
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from autorank import autorank, create_report, plot_stats, latex_table

np.random.seed(42)
pd.set_option('display.max_columns', 7)
std = 0.3
means = [0.2, 0.3, 0.5, 0.8, 0.85, 0.9]
sample_size = 50
data = pd.DataFrame()
for i, mean in enumerate(means):
    data['pop_%i' % i] = np.random.normal(mean, std, sample_size).clip(0, 1)

res = autorank(data, alpha=0.05, verbose=False)
print(res)
create_report(res)
plot_stats(res)
plt.show()
latex_table(res)

Example #4
0
    '0.5% outliers', '1% outleirs', '5% outliers', '10% outliers', 'nonsense'
]
methods = ['Τοp-ζ', 'LOF', 'Probabilistic', 'Distance-Based']
metrics = ['RMSE', 'Eucl', 'Mink', 'Mahal']
names = [i + "-" + j for i in methods for j in metrics]

x05 = [
    0.677, 0.69, 0.84, 1, 0.762, 0.816, 0.802, 1, 0.71, 0.73, 0.58, 0.967,
    0.645, 0.557, 0.706, 0.878
]
x1 = [
    0.656, 0.639, 0.716, 1, 0.707, 0.706, 0.753, 0.996, 0.618, 0.658, 0.493,
    0.909, 0.725, 0.676, 0.692, 1
]
x5 = [
    0.635, 0.623, 0.64, 0.9, 0.655, 0.666, 0.674, 0.927, 0.51, 0.524, 0.469,
    0.703, 0.679, 0.709, 0.743, 0.945
]
x10 = [
    0.644, 0.634, 0.64, 0.822, 0.716, 0.715, 0.67, 0.907, 0.374, 0.378, 0.365,
    0.574, 0.668, 0.678, 0.728, 0.928
]
x_last = [1] * 16
data = [x05, x1, x5, x10, x_last]

results = pd.DataFrame(data=data, index=data_names, columns=names)

ranks = autorank(results)
create_report(ranks)
plot_stats(ranks)