def get_p_values(pkl_list, name_list, cut=sys.maxint, round_=0): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut=cut) p_values = calculate_statistics(best_dict, keys, round_=round_) return p_values
def main(pkl_list, name_list, save="", cut=sys.maxint, template_string=template_string, experiment_name="Name", num_evals="\\#eval"): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut) return generate_tex_template(best_dict, name_list, template_string=template_string, save=save, num_evals=num_evals, experiment_name=experiment_name)
def main(pkl_list, name_list, cut=sys.maxint): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut=cut) for k in keys: sys.stdout.write("%10s: %s experiment(s)\n" % (k, len(best_dict[k]))) sys.stdout.write("Unpaired t-tests-----------------------------------------------------\n") # TODO: replace by itertools for idx, k in enumerate(keys): if len(keys) > 1: for j in keys[idx+1:]: t_true, p_true = stats.ttest_ind(best_dict[k], best_dict[j]) rounded_t_true, rounded_p_true = stats.ttest_ind(numpy.round(best_dict[k], 3), numpy.round(best_dict[j], 3)) sys.stdout.write("%10s vs %10s\n" % (k, j)) sys.stdout.write("Standard independent 2 sample test, equal population variance\n") sys.stdout.write(" "*24 + " T: %10.5e, p-value: %10.5e (%5.3f%%) \n" % (t_true, p_true, p_true*100)) sys.stdout.write("Rounded: ") sys.stdout.write(" T: %10.5e, p-value: %10.5e (%5.3f%%)\n" % (rounded_t_true, rounded_p_true, rounded_p_true*100)) if tuple(map(int, (scipy.__version__.split(".")))) >= (0, 11, 0): # print scipy.__version__ >= '0.11.0' t_false, p_false = stats.ttest_ind(best_dict[k], best_dict[j], equal_var=False) rounded_t_false, rounded_p_false = stats.ttest_ind(numpy.round(best_dict[k], 3), numpy.round(best_dict[j], 3), equal_var=False) sys.stdout.write("Welch's t-test, no equal population variance\n") sys.stdout.write(" "*24) sys.stdout.write(": T: %10.5e, p-value: %10.5e (%5.3f%%)\n" % (t_false, p_false, p_false*100)) sys.stdout.write("Rounded: ") sys.stdout.write(": T: %10.5e, p-value: %10.5e (%5.3f%%)\n" % (rounded_t_false, rounded_p_false, rounded_p_false*100)) sys.stdout.write("\n") sys.stdout.write("Best Value-----------------------------------------------------------\n") for k in keys: sys.stdout.write("%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" % (k, float(numpy.mean(best_dict[k])), float(numpy.min(best_dict[k])), numpy.max(best_dict[k]), float(numpy.std(best_dict[k])))) sys.stdout.write("Needed Trials--------------------------------------------------------\n") for k in keys: sys.stdout.write("%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" % (k, float(numpy.mean(idx_dict[k])), float(numpy.min(idx_dict[k])), numpy.max(idx_dict[k]), float(numpy.std(idx_dict[k])))) sys.stdout.write("------------------------------------------------------------------------\n")
def get_statistics_as_text(pkl_list, name_list, cut=sys.maxint, round_=0): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut=cut) p_values = calculate_statistics(best_dict, keys, round_=round_) output = StringIO.StringIO() output.write( "Unpaired t-tests-----------------------------------------------------\n" ) output.write( "Standard independent 2 sample test, equal population variance\n") for key in keys: output.write("%10s: %s experiment(s)\n" % (key, len(best_dict[key]))) for idx, key0 in enumerate(p_values): if len(keys) > 1: for j, key1 in enumerate(p_values[key0]): output.write("%10s vs %10s" % (key0, key1)) output.write( " p-value: %10.5e (%5.3f%%) \n" % (p_values[key0][key1], p_values[key0][key1] * 100)) output.write("\n") output.write( "Best Value-----------------------------------------------------------\n" ) for k in keys: output.write("%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" % (k, float(numpy.mean(best_dict[k])), float(numpy.min(best_dict[k])), numpy.max( best_dict[k]), float(numpy.std(best_dict[k])))) output.write( "Needed Trials--------------------------------------------------------\n" ) for k in keys: output.write( "%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" % (k, float(numpy.mean(idx_dict[k])), float(numpy.min(idx_dict[k])), numpy.max(idx_dict[k]), float(numpy.std(idx_dict[k])))) output.write( "------------------------------------------------------------------------\n" ) output.seek(0) return output
def main(pkl_list, name_list, save="", cut=sys.maxint, template_string=template_string, experiment_name="Name", num_evals="\\#eval"): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict( name_list, pickles, cut) return generate_tex_template(best_dict, name_list, template_string=template_string, save=save, num_evals=num_evals, experiment_name=experiment_name)
def get_pairwise_wins(pkl_list, name_list, cut=sys.maxint, round_=0): pickles = plot_util.load_pickles(name_list, pkl_list) best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles, cut=cut) p_values = calculate_statistics(best_dict, keys, round_=round_) wins_of_optimizer = dict() for key in p_values: wins_of_optimizer[key] = defaultdict(int) for idx, key0 in enumerate(p_values): if len(keys) > 1: for j, key1 in enumerate(p_values[key0]): if p_values[key0][key1] < 0.05: if best_dict[key0] < best_dict[key1]: wins_of_optimizer[key0][key1] += 1 elif best_dict[key1] < best_dict[key0]: wins_of_optimizer[key1][key0] += 1 return wins_of_optimizer
def calculate_rankings(trial_list, name_list, bootstrap_samples=500, cut=50): bootstrap_samples = int(bootstrap_samples) optimizers = [name[0] for name in name_list] pickles = plot_util.load_pickles(name_list, trial_list) rankings = dict() rs = np.random.RandomState(1) combinations = [] for i in range(bootstrap_samples): combination = [] target = len(optimizers) maximum = [len(pickles[name]) for name in optimizers] for idx in range(target): combination.append(rs.randint(maximum[idx])) combinations.append(np.array(combination)) for optimizer in optimizers: rankings[optimizer] = np.zeros((cut + 1, ), dtype=np.float64) rankings[optimizer][0] = np.mean(range(1, len(optimizers) + 1)) for i in range(1, cut + 1): num_products = 0 for combination in combinations: ranks = scipy.stats.rankdata([ np.round( plot_util.get_best(pickles[optimizers[idx]][number], i), 5) for idx, number in enumerate(combination) ]) num_products += 1 for j, optimizer in enumerate(optimizers): rankings[optimizer][i] += ranks[j] for optimizer in optimizers: rankings[optimizer][i] = rankings[optimizer][i] / num_products return rankings
def calculate_rankings(trial_list, name_list, bootstrap_samples=500, cut=50): bootstrap_samples = int(bootstrap_samples) optimizers = [name[0] for name in name_list] pickles = plot_util.load_pickles(name_list, trial_list) rankings = dict() rs = np.random.RandomState(1) combinations = [] for i in range(bootstrap_samples): combination = [] target = len(optimizers) maximum = [len(pickles[name]) for name in optimizers] for idx in range(target): combination.append(rs.randint(maximum[idx])) combinations.append(np.array(combination)) for optimizer in optimizers: rankings[optimizer] = np.zeros((cut+1,), dtype=np.float64) rankings[optimizer][0] = np.mean(range(1, len(optimizers) + 1)) for i in range(1, cut+1): num_products = 0 for combination in combinations: ranks = scipy.stats.rankdata( [np.round( plot_util.get_best(pickles[optimizers[idx]][number], i), 5) for idx, number in enumerate(combination)]) num_products += 1 for j, optimizer in enumerate(optimizers): rankings[optimizer][i] += ranks[j] for optimizer in optimizers: rankings[optimizer][i] = rankings[optimizer][i] / num_products return rankings