def frac_free_data(filename): size = hintgc_format.get_column(filename, hintgc_format.heap_size) hinted = hintgc_format.get_column(filename, hintgc_format.amount_hinted) caught = hintgc_format.get_column(filename, hintgc_format.subject_reclaimed) leaked = hintgc_format.get_column(filename, hintgc_format.followup_reclaimed) tuples = zip(size, hinted, caught, leaked) def denom(row): return (max(float(row[2]),0)+max(float(row[3]),0)) def compute_row(row): total = denom(row) sub = max(float(row[2]),0) fol = max(float(row[3]), 0) if total > 0: # artifical limit to avoid bullshit data return [sub/total, fol/total] else: return [0.0, 0.0] # bin by size, then do that plot - closer to actually useful percentages = [ [compute_row(row), row] for row in tuples] fol = [row[0][1]*100 for row in percentages] #print fol if False: def looks_like_garbage(item): if float(item[1][1]) != 0: if float(item[1][2])/float(item[1][1]) > .9 and int(item[1][3]) <= 30000: return True else: return False c = 0 print "Details" for item in percentages: if item[0][1]*100 > 20 and not looks_like_garbage(item): c += 1 print item print c if True: def looks_like_garbage(item): if float(item[1][1]) != 0: if int(item[1][3]) <= 100000000: return True else: return False c = 0 print "Details" for item in percentages: if item[0][1]*100 > 20 and not looks_like_garbage(item): c += 1 print item print c return get_stats(fol)
def one_bench_data(f): assert os.path.exists(f) #print mb heap_size = hintgc_format.get_column(f, hintgc_format.heap_size) heap_size = [int(x) for x in heap_size] #print heap_size # they're only _mostly_ equal - fragementation (FIXME) #assert_all_equal(heap_size) size_map = { 33398784.0 : "31.8 MB", 24899584.0 : "23.7 MB", 92938240.0 : "88.6 MB", 193601536.0 : "184.6 MB", 59383808.0 : "56.6 MB", 780804096.0 : "744.6 MB", 27119616.0 : "25.9 MB", 29339648.0 : "28 MB" } heap_size = size_map[numpy.mean(heap_size)] def gc_flag(row): return int(row[5]) flags = hintgc_format.get_column_pf(f, gc_flag) assert_all_equal(flags); assert flags[0] == 0 flags = hintgc_format.get_column_gc(f, gc_flag) assert_all_equal(flags); assert flags[0] == 1 pfruntime = hintgc_format.get_column_pf(f, hintgc_format.runtime) pfruntime = [int(x) for x in pfruntime] gcruntime = hintgc_format.get_column_gc(f, hintgc_format.runtime) gcruntime = [int(x) for x in gcruntime] #print pfruntime #print gcruntime gc = numpy.mean(gcruntime) pf = numpy.mean(pfruntime) return [gc, pf, heap_size]
def hist2d(gccsvfile, pfcsvfile, rootname, xselect): ofile = open(rootname + ".dat", 'w') gcbins = hintgc_format.bin_column(gccsvfile, xselect) pfbins = hintgc_format.bin_column(pfcsvfile, xselect) gckeysi = [int(i) for i in gcbins.keys()] pfkeysi = [int(i) for i in pfbins.keys()] a = hintgc_format.get_column(gccsvfile, xselect) print_stats(a) a = hintgc_format.get_column(pfcsvfile, xselect) print_stats(a) lowest = min( min(gckeysi), min(pfkeysi)) highest = max( max(gckeysi), max(pfkeysi)) for i in range(lowest, highest+1, 10): key = str(i) if key not in gcbins: gcbins[key] = 0 if key not in pfbins: pfbins[key] = 0 for i in range(250, highest+1, 10): key = str(i) gcval = gcbins[key] pfval = pfbins[key] ofile.write( str(key) + ' ' + str(gcval) + ' ' + str(pfval) + '\n') with open(rootname + ".plt", 'w') as pltfile: pltfile.write("set term epslatex\n") pltfile.write("set output '"+rootname+".tex'\n") pltfile.write("set ylabel \""+hintgc_format.labels[xselect]+"\"\n") pltfile.write("set xlabel \"Count\"\n") pltfile.write("set yrange [*:*]\n") pltfile.write("plot \""+rootname+".dat\" using 2:1 title \"GC\" lt rgb \"red\", \""+rootname+".dat\" using 3:1 title \"PF\" lt rgb \"blue\"\n") with open("generated/commands.sh", "a") as cmds: cmds.write("gnuplot "+rootname+".plt && epstopdf "+rootname+".eps\n")
def format_time(num): num = float(num) s = '%2.2f' % num; if s.startswith("0."): s = " "+s return s rows = [] for mb in files: def gc_flag(row): return int(row[5]) pff = input_location + mb[1] flags = hintgc_format.get_column(pff, gc_flag) assert_all_equal(flags); assert flags[0] == 0 # flags = hintgc_format.get_column(gcf, gc_flag) # assert_all_equal(flags); # assert flags[0] == 1 def get_stats(data): data = [float(x) for x in data] return [len(data), min(data), max(data), format_time(sum(data)/len(data)), scipy.stats.scoreatpercentile(data, 50), #scipy.stats.scoreatpercentile(data, 95), #scipy.stats.scoreatpercentile(data, 90), #scipy.stats.scoreatpercentile(data, 99), #scipy.stats.scoreatpercentile(data, 99.9), #scipy.stats.scoreatpercentile(data, 99.95),
return '%.2f' % num; def format_int(num): num = float(num) s = '%d' % num; return s rows = [] if False: def gc_flag(row): return int(row[5]) # these data files are not mixed pff = "../benchmarks/results/clang_build_pf_stats-release.csv" gcf = "../benchmarks/results/clang_build_gc_stats-release.csv" flags = hintgc_format.get_column(pff, gc_flag) assert_all_equal(flags); assert flags[0] == 0 flags = hintgc_format.get_column(gcf, gc_flag) assert_all_equal(flags); assert flags[0] == 1 scratch = hintgc_format.get_column(pff, hintgc_format.subject_reclaimed) scratch = [int(x) for x in scratch] reclaimed = sum(scratch) scratch = hintgc_format.get_column(pff, hintgc_format.amount_hinted) scratch = [int(x) for x in scratch] hinted = sum(scratch) hinted_a = scratch
input_location = sys.argv[1]; import os assert os.path.exists(input_location) config = sys.argv[2] assert config in ["all", "basic", "opts"] rows = [] for mb in files: if os.path.exists(input_location + "edge-filtered-header/"): f = input_location + "edge-filtered-header/" + mb[1] else: f = input_location + mb[1] assert os.path.exists(f) #print mb heap_size = hintgc_format.get_column(f, hintgc_format.heap_size) heap_size = [int(x) for x in heap_size] #print heap_size # they're only _mostly_ equal - fragementation (FIXME) #assert_all_equal(heap_size) size_map = { 33398784.0 : "31.8 MB", 24899584.0 : "23.7 MB", 92938240.0 : "88.6 MB", 193601536.0 : "184.6 MB", 59383808.0 : "56.6 MB", 780804096.0 : "744.6 MB", 27119616.0 : "25.9 MB", 29339648.0 : "28 MB" } heap_size = size_map[numpy.mean(heap_size)] def gc_flag(row): return int(row[5]) flags = hintgc_format.get_column_pf(f, gc_flag)
while num >= 1000: num = num / 1000 p = p + 1 return ("%.2f" % num) + " " + units[p] rows = [] for mb in files: def gc_flag(row): return int(row[5]) # these data files are not mixed pff = input_location + mb[1] #gcf = "../benchmarks/results/spec-test-gc/" + mb[1] flags = hintgc_format.get_column(pff, gc_flag) assert_all_equal(flags); assert flags[0] == 0 # flags = hintgc_format.get_column(gcf, gc_flag) # assert_all_equal(flags); # assert flags[0] == 1 scratch = hintgc_format.get_column(pff, hintgc_format.subject_reclaimed) scratch = [int(x) for x in scratch] reclaimed = sum(scratch) scratch = hintgc_format.get_column(pff, hintgc_format.amount_hinted) scratch = [int(x) for x in scratch] hinted = sum(scratch) hinted_a = scratch