data[3 * index2].append(mahalanobis[0]) data[3 * index2 + 1].append(mean(mahalanobis[:3])) data[3 * index2 + 2].append(mean(mahalanobis[:5])) for index, m in enumerate(last[2:]): for index2, (o, to) in enumerate(zip(outliers, true_outliers)): filename = constant + o + m print(filename) headers = [ "Distance", "file", "k", "m2", "time", "precision", "reported" ] df = pd.read_csv(filename, header=None, names=headers) df["recall"] = df["precision"] * df["reported"] / to df["f1"] = 2 * df["precision"] * df["recall"] / (df["precision"] + df["recall"]) group_by_distance = df.groupby(df.Distance) mahalanobis_df = group_by_distance.get_group("mahalanobis") mahalanobis = sorted(list(mahalanobis_df["f1"]), reverse=True) print(mahalanobis) data[3 * index2].append(mahalanobis[0]) data[3 * index2 + 1].append(mean(mahalanobis[:3])) data[3 * index2 + 2].append(mean(mahalanobis[:5])) results = pd.DataFrame(index=names, columns=methods, data=data) ranks = autorank(results, alpha=0.01) create_report(ranks) x = plot_stats(ranks, allow_insignificant=True) x.get_figure().savefig("output2/results/mahalanobis_small_insignificant.eps", format="eps", bbox_inches="tight")
import numpy as np import pandas as pd import matplotlib.pyplot as plt from autorank import autorank, create_report, plot_stats, latex_table np.random.seed(42) pd.set_option('display.max_columns', 7) std = 0.3 means = [0.2, 0.3, 0.5, 0.8, 0.85, 0.9] sample_size = 50 data = pd.DataFrame() for i, mean in enumerate(means): data['pop_%i' % i] = np.random.normal(mean, std, sample_size).clip(0, 1) res = autorank(data, alpha=0.05, verbose=False) print(res) create_report(res) plot_stats(res) plt.show() latex_table(res)