Example #1
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    ax = fig.add_subplot(1, 1, 1)

    faults = sorted(faults, key=lambda f: f.timestamp)
    read = []
    rx = []
    write = []
    wx = []
    for i, f in enumerate(faults):
        if f.access_type == "r":
            read.append(f.fault_address)
            rx.append(i)
        elif f.access_type == "w":
            write.append(f.fault_address)
            wx.append(i)
        else:
            print("unaccounted for access type:", faults.access_type)

    #print(ranges)
    ax.hlines(ranges, 0, len(faults))
    ax.plot(rx, read, 'b' + m, markersize=1, label="read")
    ax.plot(wx, write, 'r' + m, markersize=1, label="write")

    #fig.set_size_inches(16, 10)

    plt.legend()
    ax.set_xlabel('Fault Occurence')
    ax.set_ylabel('Fault Fault Index')
    psize = dirname(args.csv).split("_")[1]
    figname = splitext(basename(
        args.csv))[0].split("_")[0] + "-" + psize + "-time.png"

    ylabels = map(lambda t: '0x%013X' % int(t), ax.get_yticks())
    ax.set_yticklabels(ylabels)
    plt.tight_layout()

    print('saving figure:', figname)
    fig.savefig(figname, dpi=500)
    plt.close(fig)
Example #2
0
def parse_bsize_sub(csv):
    e = Experiment(csv)
    size = None
    if "pf" in csv:
        size = basename(dirname(csv)).split("_")[2]
    else:
        size = basename(dirname(csv)).split("_")[1]
    return (e, size)
Example #3
0
def parse_bsize(t):
    bsize, lis = t
    x = []
    y = []
    for csv in humansorted(lis):
        e = Experiment(csv)
        size = None
        total_dups = None
        if "pf" in csv:
            size = basename(dirname(csv)).split("_")[2]
            total_dups = sum(e.get_duplicate_faults_64k())
        else:
            size = basename(dirname(csv)).split("_")[1]
            total_dups = sum(e.get_duplicate_faults_4k())
        size = size
        x.append(size)
        y.append(total_dups)
    return (x, y, bsize)
Example #4
0
def parse_bsize_sub2(t):
    bsize, csv = t
    e = Experiment(csv)
    size = None
    if "pf" in csv:
        size = basename(dirname(csv)).split("_")[2]
    else:
        size = basename(dirname(csv)).split("_")[1]
    app = splitext(basename(csv))[0].split("_")[0]
    return ExperimentStats(e, size, bsize, app)
Example #5
0
def per_app(c):

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000

    x0, y0 = do_batch_len_time(batches, faults, c)
    x1, y1 = do_batch_len_time_4k(batches, faults, c)
    x2, y2 = do_batch_len_time_64k(batches, faults, c)
    x3, y3 = do_batch_len_time_2m(batches, faults, c)
    do_all(batches, faults, c, x0, x1, x2, x3, y0, y1, y2, y3)
    x4, y4 = do_batch_len_time_transfers_4k(batches, faults, c)
    x5, y5 = do_batch_len_time_transfers_64k(batches, faults, c)
    do_all_more(batches, faults, c, x0, x1, x2, x3, x4, x5, y0, y1, y2, y3, y4,
                y5)
    print(c)
    pages_per_vablock_per_batch(batches, faults)
Example #6
0
def parse_bsize(t):
    bsize, lis = t
    es = []
    for csv in humansorted(lis):
        e = Experiment(csv)
        size = None
        if "pf" in csv:
            size = basename(dirname(csv)).split("_")[2]
        else:
            size = basename(dirname(csv)).split("_")[1]
        es.append(ExperimentStats(e, size, bsize))
    return (es, bsize)
Example #7
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)
    
    e = Experiment(c)
    
    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    ax = fig.add_subplot(1,1,1)
    
    sm_ids = sorted(e.count_utlb_client_pairs().keys())
    cmap = plt.get_cmap('jet')
    colors = cmap(np.linspace(0, 1.0, len(sm_ids)))
    
    id_f_map = {sm_id:[] for sm_id in sm_ids}
    id_fx_map = {sm_id:[] for sm_id in sm_ids}
    x = 1
    for fault in faults:
        mapping = (fault.utlb_id, fault.client_id)
        id_f_map[mapping].append(fault.fault_address)
        id_fx_map[mapping].append(x)
        x += 1
        
    #print(ranges)
    ax.hlines(ranges, 0, len(faults))

    for i, (sm_id, color) in enumerate(zip(sm_ids, colors)):
        #ax.scatter(id_fx_map[sm_id], id_f_map[sm_id] , color=color, label=str(sm_id), s=.01)
        ax.plot(id_fx_map[sm_id], id_f_map[sm_id] , color=color, label=str(sm_id), linestyle="None", markersize=1, marker=",")
        #if i == 0:
        #    break
    

    #fig.set_size_inches(16, 10)

    #plt.legend()
    ax.set_xlabel('Fault Occurence')
    ax.set_ylabel('Fault Fault Index')
    psize = dirname(args.csv).split("_")[1]
    figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize +  "-utlb.png"
    
    ylabels = map(lambda t: '0x%013X' % int(t), ax.get_yticks())
    ax.set_yticklabels(ylabels)
    plt.tight_layout()

    print('saving figure:', figname)
    fig.savefig(figname, dpi=800)
    plt.close(fig)
Example #8
0
def parse_bsize(t):
    bsize, lis = t
    x = []
    y = []
    for csv in humansorted(lis):
        e = Experiment(csv)
        size = None
        total_dups = None
        if "pf" in csv:
            size = basename(dirname(csv)).split("_")[2]
        else:
            size = basename(dirname(csv)).split("_")[1]
        size = size
        x.append(size)
        y.append(len(e.batches))
    return (x, y, bsize)
Example #9
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    parser.add_argument('-o', type=str, default="", help='output filename')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    #ax = fig.add_subplot(1,1,1)
    #ax2 = ax.twiny()

    #print(batches)

    plt.clf()

    ax = plt.gca()

    x = []
    y = []
    for i, batch in enumerate(batches):
        y_set = set()
        for fault in batch:
            if fault.utlb_id not in y_set:
                x.append(i)
                y_set.add(fault.utlb_id)

        y += [y for y in y_set]

    plt.plot(x, y, "bo", markersize=1)

    print("max tlb:", max(y))
    print("unique tlb", len(set(y)))

    plt.xlabel("Batch ID")
    plt.ylabel("UTLB Fault Present")

    #plt.legend()

    psize = basename(dirname(args.csv))  #.split("_")[-1]
    print("psize:", psize)

    figname = None
    if args.o == "":
        figname = (splitext(basename(args.csv))[0] + "-" + psize +
                   "-utlb-active.png").replace("_", "-")
    else:
        figname = args.o
        if ".png" not in figname:
            figname += ".png"

    #figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize +  "-sm-time-dist.png"

    plt.tight_layout()
    print('saving figure:', figname)
    fig.savefig(figname, dpi=800)
    plt.close(fig)
Example #10
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    parser.add_argument('-o', type=str, default="", help='output filename')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    #ax = fig.add_subplot(1,1,1)
    #ax2 = ax.twiny()

    read = []
    rx = []
    write = []
    wx = []
    for i, f in enumerate(faults):
        if f.access_type == "r":
            #read.append(f.fault_address)
            #rx.append(i)
            pass
        elif f.access_type == "w":
            #write.append(f.fault_address)
            #wx.append(i)
            pass
        elif f.access_type == "p":
            pass
        else:
            print("unaccounted for access type:", f.access_type)

    true_bl = [len(batch) for batch in batches]
    bl = [0] + true_bl
    for i, b in enumerate(bl):
        if i == 0:
            continue
        bl[i] += bl[i - 1]
    del bl[0]

    #print(batches)

    sm_ids = sorted(e.count_utlb_client_pairs().keys())
    cmap = plt.get_cmap('jet')
    colors = cmap(np.linspace(0, 1.0, len(sm_ids)))

    id_f_map = {sm_id: [] for sm_id in sm_ids}
    for fault in faults:
        mapping = (fault.utlb_id, fault.client_id)
        id_f_map[mapping].append(fault)

    times_between_faults = []
    for sm_id in id_f_map.keys():
        for first, second in zip(
                sorted(id_f_map[sm_id][0:-1], key=lambda f: f.timestamp),
                sorted(id_f_map[sm_id][1:], key=lambda f: f.timestamp)):
            times_between_faults.append(second.timestamp - first.timestamp)
    avg = np.mean(times_between_faults)
    print("avg time between faults:", avg)
    print("min time between faults", min(times_between_faults))
    print("max time between faults", max(times_between_faults))

    print("total # deltas:", len(times_between_faults))
    print("Num faults < 1e1:",
          len([t for t in times_between_faults if t < 1e1]))
    print("Num faults < 1e2:",
          len([t for t in times_between_faults if t < 1e2]))
    print("Num faults < 1e3:",
          len([t for t in times_between_faults if t < 1e3]))
    print("Num faults < 1e4:",
          len([t for t in times_between_faults if t < 1e4]))
    print("Num faults < 1e5:",
          len([t for t in times_between_faults if t < 1e5]))
    print("Num faults < 1e6:",
          len([t for t in times_between_faults if t < 1e6]))
    print("Num faults < 1e7:",
          len([t for t in times_between_faults if t < 1e7]))
    print("Num faults < 1e8:",
          len([t for t in times_between_faults if t < 1e8]))

    Q1 = np.quantile(times_between_faults, 0.25)
    Q3 = np.quantile(times_between_faults, 0.75)
    med = statistics.median(times_between_faults)

    print("Q1, median, Q3:", Q1, ",", med, ",", Q3)

    #plt.plot(times, counts, marker="*")
    hist, bins, _ = plt.hist(times_between_faults, bins=16)

    print("bins:", bins)
    logbins = np.logspace(0.0, np.log10(bins[-1]), len(bins))
    print("logbins:", logbins)

    plt.clf()

    hist, bins, _ = plt.hist(times_between_faults, bins=logbins)

    ax = plt.gca()
    plt.vlines([Q1, med, Q3],
               0,
               1,
               transform=ax.get_xaxis_transform(),
               label="Q1/Med/Q3")

    plt.xlim(xmin=1e0)
    plt.ylim(ymin=0.0)

    plt.xscale("log")

    plt.xlabel("Time Between Same-SM Fault Arrival in Buffer (NS)")
    plt.ylabel("Frequency")

    plt.legend()

    psize = basename(dirname(args.csv))  #.split("_")[-1]
    print("psize:", psize)

    figname = None
    if args.o == "":
        figname = (splitext(basename(args.csv))[0] + "-" + psize +
                   "-sm-time-dist.png").replace("_", "-")
    else:
        figname = args.o
        if ".png" not in figname:
            figname += ".png"

    #figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize +  "-sm-time-dist.png"

    plt.tight_layout()
    print('saving figure:', figname)
    fig.savefig(figname, dpi=500)
    plt.close(fig)
Example #11
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    ax = fig.add_subplot(1, 1, 1)
    #ax2 = ax.twiny()

    read = []
    rx = []
    write = []
    wx = []
    prefetch = []
    px = []
    itot = 0
    discard = []
    dx = []
    for batch, dbatch in zip(batches, e.dbatches):
        batch = sorted(batch + dbatch, key=lambda f: f.timestamp)
        for i, f in enumerate(batch):
            if f.access_type == "r":
                read.append(f.fault_address)
                rx.append(i + itot)
            elif f.access_type == "w":
                write.append(f.fault_address)
                wx.append(i + itot)
            elif f.access_type == "p":
                prefetch.append(f.fault_address)
                px.append(i + itot)
            elif f.access_type == "d":
                discard.append(f.fault_address)
                dx.append(i + itot)
            else:
                print("unaccounted for access type:", faults[i].access_type)

        itot += len(batch)

    for dbatch in e.dbatches:
        batch = sorted(dbatch, key=lambda f: f.timestamp)

    #for i,f in enumerate(sorted(faults, key=lambda f: f.timestamp)):
    #    if f.access_type == "r":
    #        read.append(f.fault_address)
    #        rx.append(i + itot)
    #    elif f.access_type == "w":
    #        write.append(f.fault_address)
    #        wx.append(i + itot)
    #    else:
    #        print("unaccounted for access type:", faults.access_type)

    base_addr = min(ranges)
    print("base_addr:", base_addr)
    #base_addr = min(faults, key=lambda f: f.fault_address).fault_address
    read = [(f - base_addr) // 4096 for f in read]
    write = [(f - base_addr) // 4096 for f in write]
    prefetch = [(f - base_addr) // 4096 for f in prefetch]
    discard = [(f - base_addr) // 4096 for f in discard]
    ranges = [(r - base_addr) // 4096 for r in ranges]
    print(read)
    print(write)

    bl = [0] + [len(batch) for batch in batches]
    for i, b in enumerate(bl):
        if i == 0:
            continue
        bl[i] += bl[i - 1]
    del bl[0]

    #print(batches)

    print("batch lengths:", [len(b) for b in batches])
    print("batch indices:", bl)

    #print(ranges)
    ylim1 = min(read + write + prefetch + discard)
    ylim2 = max(read + write + prefetch + discard)
    ax.vlines(bl, min(ranges), ylim2, label="batch", color="k", linewidth=1)
    ax.hlines(ranges,
              0,
              len(faults),
              label="malloc",
              linestyles="dotted",
              color="k",
              linewidth=1)
    #ax.hlines(ranges, 0, len(faults), label="cudaMallocManaged()", linestyles="dotted", color="k", linewidth=1)
    ax.plot(rx, read, 'b' + m, markersize=2, label="read")
    ax.plot(wx, write, 'r' + m, markersize=2, label="write")
    if (len(px) > 0):
        ax.plot(px, prefetch, 'g' + m, markersize=2, label="prefetch")
    if (len(dx) > 0):
        ax.plot(dx, prefetch, 'g' + m, markersize=2, label="discard")

    #newfaults = [f.fault_address for f in sorted(faults, key=lambda f: f.timestamp)]
    #newx = [f.timestamp for f in sorted(faults, key=lambda f: f.timestamp)]
    #newx = [int(f - min(newx)) for f in newx]
    #ax2.plot(newx, newfaults, 'g'+m, markersize=1, label="time_order")

    #print("newx", newx)

    #fig.set_size_inches(16, 10)

    ax.set_xlabel('Fault')
    ax.set_ylabel('Page Index')
    psize = dirname(args.csv).split("_")[1]
    figname = splitext(basename(
        args.csv))[0].split("_")[0] + "-" + psize + "-batch-time.png"

    #ylabels = map(lambda t: '0x%013X' % int(t), ax.get_yticks())
    #ax.set_yticklabels(ylabels)

    ax.legend(loc="upper left")
    #h1, l1 = ax.get_legend_handles_labels()
    #h2, l2 = ax2.get_legend_handles_labels()
    #ax.legend(h1+h2, l1+l2, loc="upper left")

    #ax2.set_xlabel("Time (NS)");

    plt.tight_layout()
    print('saving figure:', figname)
    fig.savefig(figname, dpi=500)
    plt.close(fig)
Example #12
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)
    
    e = Experiment(c)
    
    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    ax = fig.add_subplot(1,1,1)
    ax2 = ax.twiny()
    
    read=[]
    rx = []
    write=[]
    wx = []
    for i,f in enumerate(faults):
        if f.access_type == "r":
            read.append(f.fault_address)
            rx.append(i)
        elif f.access_type == "w":
            write.append(f.fault_address)
            wx.append(i)
        else:
            print("unaccounted for access type:", faults.access_type)


    bl = [0] + [len(batch) for batch in batches]
    for i,b in enumerate(bl):
        if i == 0:
            continue
        bl[i] += bl[i-1]
    del bl[0]

    #print(batches)
    print("batch lengths:", bl)
    
    sm_ids = sorted(e.count_utlb_client_pairs().keys())
    cmap = plt.get_cmap('jet')
    colors = cmap(np.linspace(0, 1.0, len(sm_ids)))
    
    id_f_map = {sm_id:[] for sm_id in sm_ids}
    id_fx_map = {sm_id:[] for sm_id in sm_ids}
    x = 1
    for fault in faults:
        mapping = (fault.utlb_id, fault.client_id)
        id_f_map[mapping].append(fault.fault_address)
        id_fx_map[mapping].append(x)
        x += 1


    bl = bl#[0:20]
    cap = bl[-1]

    print("plotting vlines")
    ylim2 = max(itertools.chain.from_iterable(id_f_map.values()))
    ax.vlines(bl, min(ranges), ylim2, label="batch", linewidth=.1)
    print("plotting hlines")
    #ax.hlines(ranges, 0, len(faults), label="cudaMallocManaged()", linestyles="dotted")
    ax.hlines(ranges, 0, cap, label="cudaMallocManaged()", linestyles="dotted")
    
    total_xs = []
    for i, (sm_id, color) in enumerate(zip(sm_ids, colors)):
        print("plotting sm", sm_id)
        #ax.scatter(id_fx_map[sm_id], id_f_map[sm_id] , color=color, label=str(sm_id), s=.01)
        x = id_fx_map[sm_id]
        y = id_f_map[sm_id]
        x = [f for f in x if f < cap]
        y = y[0:len(x)]
        total_xs += x

        print("faults len:", len(x))
        
        #if (sm_id == (46,36)):
            #print("x, y for weird SM_ID:", x, ",", y)
            #continue
        ax.plot(x, y, color=color, label=str(sm_id), linestyle="None", markersize=1, marker="*")
        #ax.plot(id_fx_map[sm_id], id_f_map[sm_id] , color=color, label=str(sm_id), linestyle="None", markersize=1)
        #if i == 0:
        #    break



    #STARTO = 1606348807401491424
    STARTO = 1606352856887328736
    ENDO = 1606352856889447392

    
    print ("sorting times lol")
    sorted_faults = sorted(faults, key=lambda f: f.timestamp)
    print ('building time lists')
    newfaults = [f.fault_address for f in sorted_faults]
    newx = [f.timestamp for f in sorted_faults]
    #newx = [int(f - STARTO) for f in newx]
    newx = [int(f - newx[0]) for f in newx]
        
    print("plotting times")
    ax2.plot(newx[0: max(total_xs)], newfaults[0:max(total_xs)], 'g'+m, markersize=1, label="time_order")
    #ax2.plot([0, ENDO - STARTO], [min(newfaults), min(newfaults)], 'b+', markersize=5, linestyle='None', label="kernel-ret")

    temp = newx[0: max(total_xs)]
    temp2 = []
    for i, f in enumerate(temp):
        if i == 0:
            temp2.append(f)
        else:
            if f > temp[i-1] + 1e5:
                print ("temporal batch:", len(temp2))
                temp2 = [f]
            else:
                temp2.append(f)
    print ("temporal batch:", len(temp2))
    
    for batch in batches:
    #for batch in batches[0:20]:
        print ("real batchlen:", len(batch))
    print ("total batches:", len(batches))

    #print([ENDO - STARTO])
    
    #print("newx", newx)

    #fig.set_size_inches(16, 10)

    ax.set_xlabel('Fault Occurence')
    ax.set_ylabel('Fault Fault Index')
    psize = dirname(args.csv).split("_")[1]
    figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize +  "-tlb-batch.png"
    
    ylabels = map(lambda t: '0x%013X' % int(t), ax.get_yticks())
    ax.set_yticklabels(ylabels)
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax.legend(h1+h2, l1+l2, loc="upper left")
    
    ax2.set_xlabel("Time (NS)");

    plt.tight_layout()
    print('saving figure:', figname)
    fig.savefig(figname, dpi=500)
    plt.close(fig)
Example #13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    parser.add_argument('-o', type=str, default="", help='output filename')
    parser.add_argument('-d', type=str, default="", help='out dir')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    #fig = plt.gcf()
    #ax = fig.add_subplot(1,1,1)
    #ax2 = ax.twiny()

    read = []
    rx = []
    write = []
    wx = []
    for i, f in enumerate(faults):
        if f.access_type == "r":
            #read.append(f.fault_address)
            #rx.append(i)
            pass
        elif f.access_type == "w":
            #write.append(f.fault_address)
            #wx.append(i)
            pass
        elif f.access_type == "p":
            pass
        else:
            print("unaccounted for access type:", f.access_type)

    true_bl = [len(batch) for batch in batches]
    bl = [0] + true_bl
    for i, b in enumerate(bl):
        if i == 0:
            continue
        bl[i] += bl[i - 1]
    del bl[0]

    #print(batches)

    sm_ids = sorted(e.count_utlb_client_pairs().keys())
    print("sm_ids:", sm_ids)
    cmap = plt.get_cmap('jet')
    colors = cmap(np.linspace(0, 1.0, len(sm_ids)))

    id_f_map = {sm_id: [] for sm_id in sm_ids}
    for fault in faults:
        mapping = (fault.utlb_id, fault.client_id)
        id_f_map[mapping].append(fault)

    times_between_batches = e.batch_times
    avg = e.avg_time_batches()
    print("avg time between batches:", avg)
    print("min time between batches", min(times_between_batches))
    print("max time between batches", max(times_between_batches))

    print("total # deltas:", len(times_between_batches))
    print("Num batches < 1e1:",
          len([t for t in times_between_batches if t < 1e1]))
    print("Num batches < 1e2:",
          len([t for t in times_between_batches if t < 1e2]))
    print("Num batches < 1e3:",
          len([t for t in times_between_batches if t < 1e3]))
    print("Num batches < 1e4:",
          len([t for t in times_between_batches if t < 1e4]))
    print("Num batches < 1e5:",
          len([t for t in times_between_batches if t < 1e5]))
    print("Num batches < 1e6:",
          len([t for t in times_between_batches if t < 1e6]))
    print("Num batches < 1e7:",
          len([t for t in times_between_batches if t < 1e7]))
    print("Num batches < 1e8:",
          len([t for t in times_between_batches if t < 1e8]))

    Q1 = np.quantile(times_between_batches, 0.25)
    Q3 = np.quantile(times_between_batches, 0.75)
    med = statistics.median(times_between_batches)
    avg = np.mean(times_between_batches)

    print("Q1, median, Q3:", Q1, ",", med, ",", Q3)

    #plt.plot(times, counts, marker="*")
    hist, bins, _ = plt.hist(times_between_batches)
    #hist, bins, _ = plt.hist(times_between_batches, bins=16)
    binlen = len(bins)

    print("bins:", bins)
    logbins = np.logspace(0.0, np.log10(bins[-1]), len(bins))
    print("logbins:", logbins)

    plt.clf()

    hist, bins, _ = plt.hist(times_between_batches, bins=logbins)

    ax = plt.gca()
    plt.vlines([Q1, med, Q3],
               0,
               1,
               transform=ax.get_xaxis_transform(),
               label="Q1/Med/Q3")
    plt.vlines([avg],
               0,
               1,
               transform=ax.get_xaxis_transform(),
               label="Avg",
               color="r")

    plt.xlim(xmin=1e0)
    plt.ylim(ymin=0.0)

    plt.xscale("log")

    plt.xlabel("Time Between Batch Fault Arrival in Buffer (NS)")
    plt.ylabel("Frequency")

    plt.legend()

    psize = basename(dirname(args.csv))  #.split("_")[-1]
    print("psize:", psize)

    figname = None
    if args.o == "":
        figname = (args.d + "/" + splitext(basename(args.csv))[0] + "-" +
                   psize + "-batch-time-dist.png").replace("_", "-")
    else:
        figname = args.o
        if ".png" not in figname:
            figname += ".png"

    #figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize +  "-sm-time-dist.png"

    plt.tight_layout()
    print('saving figure:', figname)
    plt.savefig(figname, dpi=500)

    plt.close()

    if ("pf") in args.csv:
        xs = [
            len(batch) - dup + len(pfbatch) for batch, dup, pfbatch in zip(
                e.batches, e.get_duplicate_faults_64k(), e.pfbatches)
        ]
    else:
        xs = [
            len(batch) - dup
            for batch, dup in zip(e.batches, e.get_duplicate_faults_4k())
        ]
        #xs = [len(batch) - e.get_duplicate_faults_4k() for batch in e.batches]

    hist, bins, _ = plt.hist(xs, bins=len(logbins))
    logbins = np.logspace(0.0, np.log10(bins[-1]), len(bins))
    plt.clf()
    hist, bins, _ = plt.hist(xs, bins=logbins)

    #plt.hist([len(batch) for batch in e.batches])
    plt.xlabel("Batch Sizes")
    plt.ylabel("Frequency")

    plt.xlim(xmin=1e0)
    plt.ylim(ymin=0.0)
    plt.xscale("log")

    if args.o == "":
        figname = (args.d + "/" + splitext(basename(args.csv))[0] + "-" +
                   psize + "-batch-size-dist.png").replace("_", "-")
    else:
        figname = args.o
        if ".png" not in figname:
            figname += ".png"

    plt.tight_layout()
    print('saving figure:', figname)
    plt.savefig(figname, dpi=500)

    plt.close()

    ys = times_between_batches

    size_time_plot(xs, ys, psize, args, "pf-dups")

    if ("pf") in args.csv:
        xs = [
            len(batch) - dup
            for batch, dup in zip(e.batches, e.get_duplicate_faults_64k())
        ]
    else:
        xs = [
            len(batch) - dup
            for batch, dup in zip(e.batches, e.get_duplicate_faults_4k())
        ]
        #xs = [len(batch) - e.get_duplicate_faults_4k() for batch in e.batches]
    size_time_plot(xs, ys, psize, args, "dups")

    if ("pf") in args.csv:
        xs = [len(batch) for batch in e.batches]
    else:
        xs = [len(batch) for batch in e.batches]

    size_time_plot(xs, ys, psize, args, "")

    div = 65536 if "pf" in args.csv else 4096
    xs = []
    for batch in batches:
        vals = sorted(set([f.fault_address // div for f in batch]))
        transfers = 0
        prev = vals[0]
        count = 0
        for val in vals:
            if val > 1 + prev or val % (2097152 // div) == 0:
                transfers += 1
                count = 0
            else:
                count += 1
            prev = val
        if count > 0:
            transfers += 1

        xs.append(transfers)

    size_time_plot(xs, ys, psize, args, "transfers")
Example #14
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('csv', type=str, help='Full path to CSV file')
    args = parser.parse_args()
    c = args.csv
    m = "*"

    if not ".txt" in args.csv:
        print("Suspicious input file with no/wrong extension:", args.csv)
        exit(1)

    e = Experiment(c)

    ranges, faults, batches, num_batches = e.get_raw_data()
    e.print_info()

    matplotlib.rcParams['agg.path.chunksize'] = 10000
    fig = plt.figure()
    #fig = plt.gcf()
    ax = fig.add_subplot(1, 1, 1)
    ax2 = ax.twiny()

    read = []
    rx = []
    write = []
    wx = []
    for i, f in enumerate(faults):
        if f.access_type == "r":
            read.append(f.fault_address)
            rx.append(i)
        elif f.access_type == "w":
            write.append(f.fault_address)
            wx.append(i)
        else:
            print("unaccounted for access type:", faults.access_type)

    bl = [0] + [len(batch) for batch in batches]
    for i, b in enumerate(bl):
        if i == 0:
            continue
        bl[i] += bl[i - 1]
    del bl[0]

    #print(batches)
    print("batch lengths:", bl)

    #print(ranges)
    ylim1 = min(read, write)
    ylim2 = max(max(read), max(write))
    ax.vlines(bl, min(ranges), ylim2, label="batch")
    ax.hlines(ranges, 0, len(faults), label="cudaMallocManaged()")
    ax.plot(rx, read, 'b' + m, markersize=1, label="read")
    ax.plot(wx, write, 'r' + m, markersize=1, label="write")

    newfaults = [
        f.fault_address for f in sorted(faults, key=lambda f: f.timestamp)
    ]
    newx = [f.timestamp for f in sorted(faults, key=lambda f: f.timestamp)]
    newx = [int(f - min(newx)) for f in newx]
    ax2.plot(newx, newfaults, 'g' + m, markersize=1, label="time_order")

    print("newx", newx)

    #fig.set_size_inches(16, 10)

    ax.set_xlabel('Fault Occurence')
    ax.set_ylabel('Fault Fault Index')
    psize = dirname(args.csv).split("_")[1]
    figname = splitext(basename(
        args.csv))[0].split("_")[0] + "-" + psize + "-batch.png"

    ylabels = map(lambda t: '0x%013X' % int(t), ax.get_yticks())
    ax.set_yticklabels(ylabels)

    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc="upper left")

    ax2.set_xlabel("Time (NS)")

    plt.tight_layout()
    print('saving figure:', figname)
    fig.savefig(figname, dpi=500)
    plt.close(fig)