def create_better_than_table(benchmark_snapshot_df): """Creates table showing whether fuzzer in row is statistically significantly better than the fuzzer in the column.""" p_values = stat_tests.one_sided_u_test(benchmark_snapshot_df) # Turn "significant" p-values into 1-s. better_than = p_values.applymap( lambda p: p < stat_tests.SIGNIFICANCE_THRESHOLD) better_than = better_than.applymap(int) # Order rows and columns of matrix according to score ranking. score = better_than.sum(axis=1).sort_values(ascending=False) better_than = better_than.reindex(index=score.index, columns=score.index[::-1]) return better_than
def benchmark_rank_by_stat_test_wins(benchmark_snapshot_df): """Carries out one-tailed statistical tests for each fuzzer pair. Returns ranking according to the number of statistical test wins. """ p_values = stat_tests.one_sided_u_test(benchmark_snapshot_df) # Turn "significant" p-values into 1-s. better_than = p_values.applymap( lambda p: p < stat_tests.SIGNIFICANCE_THRESHOLD) better_than = better_than.applymap(int) score = better_than.sum(axis=1).sort_values(ascending=False) score.rename('stat wins', inplace=True) return score