def t_test(name): # if name == 'c20ng': # return dresults = experiment.read_csv(experiment.csv_path(name, "dag", "compare")) tresults = experiment.read_csv(experiment.csv_path(name, "tree", "compare")) dtst = [float(r["tst_llh"]) for r in dresults] ttst = [float(r["tst_llh"]) for r in tresults] dsize = [int(r["num_nodes"]) for r in dresults] tsize = [int(r["num_nodes"]) for r in tresults] dtime = [float(r["time"]) for r in dresults] ttime = [float(r["time"]) for r in tresults] print (name + " ")[:8], "\t%.4f" % np.mean(dtst), "\t%.4f" % np.mean(ttst), "\t", np.mean(dtst) > np.mean( ttst ), "\t\t", "%6d" % int(np.mean(dsize)), "%7d" % int(np.mean(tsize)), "\t", "%.4f" % ( np.mean(dsize) / np.mean(tsize) ), "\t%8.2f" % np.mean( dtime ), " %8.2f" % np.mean( ttime ), np.mean( dtime ) / np.mean( ttime ), from scipy import stats print "\t", stats.ttest_ind(dtst, ttst, equal_var=False)[1] < 0.05
def best_grid_point(name, structure, dirname): bp = experiment.csv_path(name, structure, dirname) rows = experiment.read_csv(bp) return best_row(rows)