def frac_free_data(filename):
    size = hintgc_format.get_column(filename, hintgc_format.heap_size)
    hinted = hintgc_format.get_column(filename, hintgc_format.amount_hinted)
    caught = hintgc_format.get_column(filename, hintgc_format.subject_reclaimed)
    leaked = hintgc_format.get_column(filename, hintgc_format.followup_reclaimed)

    tuples = zip(size, hinted, caught, leaked)

    def denom(row):
        return (max(float(row[2]),0)+max(float(row[3]),0))

    def compute_row(row):
        total = denom(row)
        sub = max(float(row[2]),0)
        fol = max(float(row[3]), 0)
        if total > 0: # artifical limit to avoid bullshit data
            return [sub/total, fol/total]
        else:
            return [0.0, 0.0]

# bin by size, then do that plot - closer to actually useful


    percentages = [ [compute_row(row), row] for row in tuples]
    fol = [row[0][1]*100 for row in percentages] 

    #print fol
    if False:
        def looks_like_garbage(item):
            if float(item[1][1]) != 0:
                if float(item[1][2])/float(item[1][1]) > .9 and int(item[1][3]) <= 30000:
                    return True
            else:
                return False
        c = 0
        print "Details"
        for item in percentages:
            if item[0][1]*100 > 20 and not looks_like_garbage(item):
                c += 1
                print item
        print c
    if True:
        def looks_like_garbage(item):
            if float(item[1][1]) != 0:
                if int(item[1][3]) <= 100000000:
                    return True
            else:
                return False
        c = 0
        print "Details"
        for item in percentages:
            if item[0][1]*100 > 20 and not looks_like_garbage(item):
                c += 1
                print item
        print c


    return get_stats(fol)
def one_bench_data(f):
    assert os.path.exists(f)
    #print mb
    heap_size = hintgc_format.get_column(f, hintgc_format.heap_size)
    heap_size = [int(x) for x in heap_size]

    #print heap_size
    # they're only _mostly_ equal - fragementation (FIXME)
    #assert_all_equal(heap_size)

    size_map = { 33398784.0 : "31.8 MB", 24899584.0 : "23.7 MB", 92938240.0 : "88.6 MB", 193601536.0 : "184.6 MB", 59383808.0 : "56.6 MB", 780804096.0 : "744.6 MB", 27119616.0 : "25.9 MB", 29339648.0 : "28 MB" }

    heap_size = size_map[numpy.mean(heap_size)]
    

    def gc_flag(row):
        return int(row[5])

    flags = hintgc_format.get_column_pf(f, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 0
    flags = hintgc_format.get_column_gc(f, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 1
    
    pfruntime = hintgc_format.get_column_pf(f, hintgc_format.runtime)
    pfruntime = [int(x) for x in pfruntime]
    
    gcruntime = hintgc_format.get_column_gc(f, hintgc_format.runtime)
    gcruntime = [int(x) for x in gcruntime]
    #print pfruntime
    #print gcruntime
    gc = numpy.mean(gcruntime)
    pf = numpy.mean(pfruntime)
    return [gc, pf, heap_size]
def hist2d(gccsvfile, pfcsvfile, rootname, xselect):
    ofile = open(rootname + ".dat", 'w')

    gcbins = hintgc_format.bin_column(gccsvfile, xselect)
    pfbins = hintgc_format.bin_column(pfcsvfile, xselect)

    gckeysi = [int(i) for i in gcbins.keys()]
    pfkeysi = [int(i) for i in pfbins.keys()]

        

    a = hintgc_format.get_column(gccsvfile, xselect)
    print_stats(a)
    a = hintgc_format.get_column(pfcsvfile, xselect)
    print_stats(a)
    lowest = min( min(gckeysi), min(pfkeysi))
    highest = max( max(gckeysi), max(pfkeysi))

    for i in range(lowest, highest+1, 10):
        key = str(i)
        if key not in gcbins:
            gcbins[key] = 0
        if key not in pfbins:
            pfbins[key] = 0

    for i in range(250, highest+1, 10):
        key = str(i)
        gcval = gcbins[key]
        pfval = pfbins[key]
        ofile.write( str(key) + ' ' + str(gcval) + ' ' + str(pfval) + '\n')
    
    with open(rootname + ".plt", 'w') as pltfile:
        pltfile.write("set term epslatex\n")
        pltfile.write("set output '"+rootname+".tex'\n")
        pltfile.write("set ylabel \""+hintgc_format.labels[xselect]+"\"\n")
        pltfile.write("set xlabel \"Count\"\n")
        pltfile.write("set yrange [*:*]\n")
        pltfile.write("plot \""+rootname+".dat\" using 2:1 title \"GC\" lt rgb \"red\", \""+rootname+".dat\" using 3:1 title \"PF\" lt rgb \"blue\"\n")

    with open("generated/commands.sh", "a") as cmds:
        cmds.write("gnuplot "+rootname+".plt && epstopdf "+rootname+".eps\n")
def format_time(num):
    num = float(num)
    s = '%2.2f' % num; 
    if s.startswith("0."):
        s = " "+s
    return s


rows = []
for mb in files:
    def gc_flag(row):
        return int(row[5])

    pff = input_location + mb[1]

    flags = hintgc_format.get_column(pff, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 0
#    flags = hintgc_format.get_column(gcf, gc_flag)
#    assert_all_equal(flags);
#    assert flags[0] == 1

    def get_stats(data):
        data = [float(x) for x in data]
        return [len(data), min(data), max(data), format_time(sum(data)/len(data)),
                scipy.stats.scoreatpercentile(data, 50),
                #scipy.stats.scoreatpercentile(data, 95),
                #scipy.stats.scoreatpercentile(data, 90),
                #scipy.stats.scoreatpercentile(data, 99),
                #scipy.stats.scoreatpercentile(data, 99.9),
                #scipy.stats.scoreatpercentile(data, 99.95),
    return '%.2f' % num; 

def format_int(num):
    num = float(num)
    s = '%d' % num; 
    return s
    

rows = []
if False:
    def gc_flag(row):
        return int(row[5])
    # these data files are not mixed
    pff = "../benchmarks/results/clang_build_pf_stats-release.csv"
    gcf = "../benchmarks/results/clang_build_gc_stats-release.csv"
    flags = hintgc_format.get_column(pff, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 0
    flags = hintgc_format.get_column(gcf, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 1

    scratch  = hintgc_format.get_column(pff, hintgc_format.subject_reclaimed)
    scratch = [int(x) for x in scratch]
    reclaimed = sum(scratch)

    scratch  = hintgc_format.get_column(pff, hintgc_format.amount_hinted)
    scratch = [int(x) for x in scratch]
    hinted = sum(scratch)
    hinted_a = scratch
input_location = sys.argv[1];
import os
assert os.path.exists(input_location)    

config = sys.argv[2]
assert config in ["all", "basic", "opts"]

rows = []
for mb in files:
    if os.path.exists(input_location + "edge-filtered-header/"):
        f = input_location + "edge-filtered-header/" + mb[1]
    else:
        f = input_location + mb[1]
    assert os.path.exists(f)
    #print mb
    heap_size = hintgc_format.get_column(f, hintgc_format.heap_size)
    heap_size = [int(x) for x in heap_size]

    #print heap_size
    # they're only _mostly_ equal - fragementation (FIXME)
    #assert_all_equal(heap_size)

    size_map = { 33398784.0 : "31.8 MB", 24899584.0 : "23.7 MB", 92938240.0 : "88.6 MB", 193601536.0 : "184.6 MB", 59383808.0 : "56.6 MB", 780804096.0 : "744.6 MB", 27119616.0 : "25.9 MB", 29339648.0 : "28 MB" }

    heap_size = size_map[numpy.mean(heap_size)]
    

    def gc_flag(row):
        return int(row[5])

    flags = hintgc_format.get_column_pf(f, gc_flag)
    while num >= 1000:
        num = num / 1000
        p = p + 1
    return ("%.2f" % num) + " " + units[p]

    
rows = []
for mb in files:
    def gc_flag(row):
        return int(row[5])


    # these data files are not mixed
    pff = input_location + mb[1]
    #gcf = "../benchmarks/results/spec-test-gc/" + mb[1]
    flags = hintgc_format.get_column(pff, gc_flag)
    assert_all_equal(flags);
    assert flags[0] == 0
#    flags = hintgc_format.get_column(gcf, gc_flag)
#    assert_all_equal(flags);
#    assert flags[0] == 1

    scratch  = hintgc_format.get_column(pff, hintgc_format.subject_reclaimed)
    scratch = [int(x) for x in scratch]
    reclaimed = sum(scratch)

    scratch  = hintgc_format.get_column(pff, hintgc_format.amount_hinted)
    scratch = [int(x) for x in scratch]
    hinted = sum(scratch)
    hinted_a = scratch