def better(new,old): "better if (1)less median or (2)same and less iqr" t = The.misc.a12 betterIqr = new.has().iqr < old.has().iqr if new.lessp: betterMed = new.has().median >= old.has().median same = a12(old._cache, new._cache) <= t else: betterMed = new.has().median <= old.has().median same = a12(new._cache, old._cache) <= t return betterMed, same, betterIqr
def effect_size(sapienzdiv_data, sapienz_data, property, app): # max obj., then rev true # min obj., then rev false if property == ids.UNIQUE_CRASHES or property == ids.MAX_COVERAGE: rev = True elif property == ids.AVG_MIN_LENGTH or property == ids.EXEC_TIME: rev = False # use effect size code by Tim es_tim = a12(sapienzdiv_data, sapienz_data, rev) es_tim_round = round(es_tim, 2) # compute with alternative code if len(sapienzdiv_data) == len(sapienz_data): if rev: es_alt = VD_A(sapienzdiv_data, sapienz_data) else: es_alt = VD_A(sapienz_data, sapienzdiv_data) es_alt_round = round(es_alt[0], 2) else: es_alt_round = "N/A" print("Effect size SAPIENZ-SAPIENZDIV for property " + property + ": " + str(es_tim_round) + " / " + str(es_alt_round) + "\t(app: " + str(app) + ")") return es_tim_round
def joes_stats_reporter(problems, algorithms, tag=""): #folder prefix for storing reports date_folder_prefix = strftime("%m-%d-%Y") #if folder does not exist, create it if not os.path.isdir('reports/' + date_folder_prefix): os.makedirs('reports/' + date_folder_prefix) #fignum generator counts the number of files in the folder fignum = len([name for name in os.listdir('reports/' + date_folder_prefix)]) + 1 #optional tag name for the file fa = open('reports/' + date_folder_prefix + "/stats_suite_summary_report" + "_" + tag + str("%02d" % fignum) + ".txt", 'w') #container for summary data data = [] algranks = [[] for alg in algorithms] nemalgranks = [[] for alg in algorithms] for p,problem in enumerate(problems): data.append([]) reports = [] for a,algorithm in enumerate(algorithms): filename = DATA_PREFIX + SUMMARY_RESULTS+problem.name + "-p" + str(MU) + "-d" + str(len(problem.decisions)) + "-o" + str(len(problem.objectives))+"_"+algorithm.name+DATA_SUFFIX print filename exit() finput = open(filename, 'rb') reader = csv.reader(finput, delimiter=',') data[p].append([]) for i,row in enumerate(reader): if i == 0: data[p][a] = [[] for obj in problem.objectives] + [[],[],[],[]] elements = row data[p][a][0].append( int(elements[3]) ) for o,obj in enumerate(problem.objectives): data[p][a][1+o].append( float(elements[4+o]) ) data[p][a][1+len(problem.objectives)].append( float(elements[4+len(problem.objectives)]) ) data[p][a][2+len(problem.objectives)].append( float(elements[5+len(problem.objectives)]) ) data[p][a][3+len(problem.objectives)].append( float(elements[6+len(problem.objectives)]) ) s = '{0: <12}'.format(problem.name + "-p" + str(MU) + "-d" + str(len(problem.decisions)) + "-o" + str(len(problem.objectives))) + "," + '{0: <12}'.format(algorithm.name) + "," s += str(avg(data[p][a][0])) + "," for dug in data[p][a]:#[len(problem.objectives)+1:]: s += str("%10.2f" % avg(dug)) + "," reports.append( s ) #read baseline filename = "data/" + problem.name + "-p" + str(MU) + "-d" + str(len(problem.decisions)) + "-o" + str(len(problem.objectives)) + "-dataset.txt" f2input = open(filename, 'rb') reader2 = csv.reader(f2input, delimiter=',') s = '{0: <12}'.format(problem.name) + "," + '{0: <12}'.format("Baseline") + "," + str("%10.2f" % 0) + ",0," for i,row in enumerate(reader2): if i > MU: s += str("%10.2f" % float(row[1])) + "," s += str("%10.2f" % 1) + "," + str("%10.2f" % 0) print s fa.write(s + "\n") IBDs = [data[p][a][1+len(problem.objectives)] for a in range(len(algorithms))] AVGs = [avg(IBDs[a]) for a in range(len(algorithms))] #print avg(IBDs[0]), avg(IBDs[1]), avg(IBDs[2]) groups = IBDs num_groups = len(algorithms) num_blocks = repeats cd = critical_difference(0.01, num_groups, num_blocks) #normed_data = [ [(g - avg(group))/(var(group)**2) for g in group] for group in groups] #rv = [stats.kstest(nd, 'norm') for nd in normed_data] #print "We can reject that the data is normal: ", [r[1]<0.01 for r in rv] ranks = [] for i in range(len(groups[0])): array = numpy.array([group[i] for group in groups]) ranks.append( stats.rankdata(array) ) rankcols = [[rank[i] for rank in ranks] for i in range(len(groups))] rankavgs = [avg(rankcols[i]) for i in range(len(groups))] z1,p1 = stats.ranksums(IBDs[0], IBDs[1]) z2,p2 = stats.ranksums(IBDs[0], IBDs[2]) z3,p3 = stats.ranksums(IBDs[1], IBDs[2]) a1= a12(IBDs[0], IBDs[1]) a2= a12(IBDs[0], IBDs[2]) a3= a12(IBDs[1], IBDs[2]) a12mat = [] #print "A12 Effect Size Matrix" #print '{0: <12}'.format("" ), '{0: >12}'.format("NSGAII"), '{0: >12}'.format("GALE"), '{0: >12}'.format("SPEA2") #print '{0: <12}'.format("NSGAII"), a12mat.append('{0: >12}'.format("-" ) + "," + str("%12.2f" % a1) + "," + str("%12.2f" % a2)) #print '{0: <12}'.format("GALE" ), a12mat.append(str("%12.2f" % (1-a1)) + "," + '{0: >12}'.format("-" ) +"," + str("%12.2f" % a3)) #print '{0: <12}'.format("SPEA2" ) a12mat.append(str("%12.2f" % (1-a2)) + "," + str("%12.2f" % (1-a3)) +"," + '{0: >12}'.format("-")) alpha = 0.05 wins = [0 for a in range(len(algorithms))] nemwins = [0 for a in range(len(algorithms))] if p1 < alpha: "gale vs nsgaii are different" if AVGs[0] < AVGs[1]: "gale wins over nsgaii" wins[0] += 1 else: wins[1] += 1 if p2 < alpha: "gale vs spea2 are different" if AVGs[0] < AVGs[2]: "gale wins over spea2" wins[0] += 1 else: wins[2] += 1 if p3 < alpha: "nsgaii vs spea2 are different" if AVGs[1] < AVGs[2]: wins[1] += 1 else: wins[2] += 1 #nemenyi ranking if (abs(rankavgs[0]-rankavgs[1])) > cd: "gale vs nsgaii are different" if rankavgs[0] < rankavgs[1]: "gale wins over nsgaii" nemwins[0] += 1 else: nemwins[1] += 1 if (abs(rankavgs[0]-rankavgs[2])) > cd: "gale vs spea2 are different" if rankavgs[0] < rankavgs[2]: "gale wins over spea2" nemwins[0] += 1 else: nemwins[2] += 1 if (abs(rankavgs[1]-rankavgs[2])) > cd: "nsgaii vs spea2 are different" if rankavgs[1] < rankavgs[2]: nemwins[1] += 1 else: nemwins[2] += 1 # rank algorithms by wins names = [alg.name for alg in algorithms] ranks = {} for index,w in enumerate(wins): if w in ranks: ranks[w].append(names[index]) else: ranks[w] = [names[index]] reversed_keys = sorted(ranks.keys(), reverse=True) #print [str(ranks[r]) + "#" + str(rk+1) for rk,r in enumerate(reversed_keys)] # rank algorithms by nemwins names = [alg.name for alg in algorithms] nemranks = {} for index,w in enumerate(wins): if w in nemranks: nemranks[w].append(names[index]) else: nemranks[w] = [names[index]] reversed_keys = sorted(nemranks.keys(), reverse=True) rank_reports = ["" for alg in algorithms] for rk,r in enumerate(reversed_keys): for item in ranks[r]: for what,name in enumerate(names): if item == name: it = what algranks[it].append(rk+1) rank_reports[it] = "Rank" + ": #" + str(rk+1) nemrank_reports = ["" for alg in algorithms] for rk,r in enumerate(reversed_keys): for item in nemranks[r]: for what,name in enumerate(names): if item == name: it = what nemalgranks[it].append(rk+1) nemrank_reports[it] = "nemRank" + ": #" + str(rk+1) nemenyi = [] for ra in rankavgs: nemenyi.append( str(ra) + "," + str(cd) ) for dig,dug,dag,dog,deg in zip(reports, rank_reports, a12mat, nemenyi,nemrank_reports): print dig, dug, "," , dag, ", ", deg fa.write(dig + dug + dag + dog + deg + "\n") print "===Average Rank===" fa.write("===Average Rank===\n") for rk,alg in zip(algranks, algorithms): print alg.name + ":" + str("%12.2f" % avg(rk)) fa.write(alg.name + ":" + str("%12.2f" % avg(rk)) + "\n") print "===Average nemRank===" fa.write("===Average nemRank===\n") for rk,alg in zip(nemalgranks, algorithms): print alg.name + ":" + str("%12.2f" % avg(rk)) fa.write(alg.name + ":" + str("%12.2f" % avg(rk)) + "\n") fa.close()