def plot6a4(): colors = misc.palette(4) fig, ax = plt.subplots(1, figsize=(9, 4), sharey=True) plt.subplots_adjust(top=0.9, bottom=0.18, left=0.1, right=0.93) allpercs = [] for j, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): for i in range(5): ztots = misc.extract_file( "res/test6/{}/fc{}/zeros/zt".format(prefix, i+1)) ztots = [sum(x) for x in ztots] sizes = misc.extract_file( "res/test6/{}/fc{}/lz4".format(prefix, i+1)) sizes = [x[-1] for x in sizes] zpercs = np.true_divide(ztots, sizes) for z in zpercs: allpercs.append(z) ax.hist(allpercs, weights=np.zeros_like(allpercs) + 1. / len(allpercs), color=colors[1], bins=[0.05*x for x in range(3, 16)]) print("6a - 4") print("std: {}".format(round(np.std(allpercs), 3))) print("av: {}".format(round(np.mean(allpercs), 3))) ax.set_ylabel("Relative Frequency") ax.set_xlabel("Zero Percentage") plt.suptitle( "Figure 4: Zero Percentage Frequency Distribution (Report 5)".format(prefix, j + 1)) misc.savefig("rep/rep6a/fig4", plt)
def plot6a3(): colors = misc.palette(7) fig, axs = plt.subplots(5, figsize=(9, 8), sharey=True, sharex=True) plt.subplots_adjust(top=0.9, bottom=0.08, left=0.1, right=0.93, hspace=0.4) for j, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): for i in range(5): ztots = misc.extract_file( "res/test6/{}/fc{}/zeros/zt".format(prefix, i+1)) ztots = [sum(x) for x in ztots] sizes = misc.extract_file( "res/test6/{}/fc{}/lz4".format(prefix, i+1)) sizes = [x[-1] for x in sizes] zpercs = np.true_divide(ztots, sizes) axs[j].plot(range(len(zpercs)), zpercs, '-', color=colors[i]) axs[j].set_title("Part {}".format(j + 1)) plt.suptitle( "Figure 3: Zero Percentage vs Field Size (Report 5)".format(prefix, j + 1)) axs[2].set_ylabel("Zero Percentage") axs[-1].set_xlabel("Field Size") leg = axs[0].legend(["fields per rec: {}".format(x + 1) for x in range(5)], loc='upper right') for line in leg.get_lines(): line.set_linewidth(3.0) misc.savefig("rep/rep6a/fig3", plt) plt.close()
def plot_one_dist(ztype, title, *, file_name=None): plt.figure(figsize=(10, 8)) recct = 1500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format(recct, ztype)) d = data[10] plt.bar(range(250)[1:][:110], d[:110]) plt.xticks([i * 10 for i in range(11)]) plt.figtext(0.5, 0.04, "Sequence Length", ha="center", fontsize=12) plt.figtext(0.08, 0.5, "Frequency", va="center", fontsize=12, rotation="vertical") plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot_cdiff(ctype, title, *, file_name=None): colors = [[i / 10, 0, i / 20] for i in range(10)] for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/{}".format(recct, ctype)) d = [] for j in range(len(data) - 1): d.append( av( diff(div(data[j][:-1], data[j][-1]), div(data[-1][:-1], data[j][-1])))) plt.plot(range(len(d)), d, '-o', color=colors[i]) plt.figtext(0.5, 0.03, "Segment #", ha="center", fontsize=12) plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot_diff(ztype, title, *, file_name=None): colors = [[i / 10, 0, i / 20] for i in range(10)] plt.figure(figsize=(10, 8)) for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format( recct, ztype)) d = [] for j in range(len(data) - 1): d.append(sqsum(diff_norm(data[j + 1], data[j]))) plt.plot(range(len(d)), d, '-o', color=colors[i]) plt.figtext(0.5, 0.01, "Segment #", ha="center", fontsize=12) plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot6c3(): data = misc.extract_file("res/test8/zeros/zt") lz4_data = misc.extract_file("res/test8/lz4") size_data = [x[-1] for x in lz4_data] data_sum = np.zeros(len(data[0])) for d, s in zip(data, size_data): if s > 13000000000: data_sum = np.add(data_sum, d) data_av = sum_norm(data_sum) bdata_av = merge_bins(data_av) ax = bar1234(data_av, bdata_av) ax.set_title( "Figure 3: Relative Frequency of Zero Bytes (14.9GB Workloads)") misc.savefig("rep/rep6c/fig3", plt) plt.close()
def extract_sizes(prefix): dirs = glob.glob("res/test6/{}/*".format(prefix)) dirs = sorted(list(filter(lambda x: x.split('/')[-1] != 'status', dirs))) sizes_list = [] for i, dir_ in enumerate(dirs): data = misc.extract_file("{}/lz4".format(dir_)) sizes = np.zeros(500) for j, d in enumerate(data): sizes[j] = d[-1] sizes_list.append(sizes) return sizes_list
def plot6b5(): data = misc.extract_file("res/test7/zeros/zt") data_sum = np.zeros(len(data[0])) for i in range(500): data_sum = np.add(data_sum, data[-i]) data_av = sum_norm(data_sum) bdata_av = merge_bins(data_av) ax = bar1234(data_av, bdata_av) ax.set_title("Figure 5: Relative Frequency of Zero Bytes (High Memory)") misc.savefig("rep/rep6b/fig5", plt) plt.close()
def plot7(): data = misc.extract_file("res/test6/a/fc5/zeros/zt") fig, axs = plt.subplots(50, 10, figsize=(15, 30)) plt.subplots_adjust(top=0.99, bottom=0.01, left=0.01, right=0.99) for j in range(10): for i in range(50): d = data[j * 50 + i][:-1] axs[i][j].plot(range(len(d)), d) axs[i][j].get_xaxis().set_visible(False) axs[i][j].get_yaxis().set_visible(False) misc.savefig("res2/ctest6/res7", plt) plt.close()
def plot6a1(): data = np.zeros(249, dtype=np.float64) for i in range(10): recct = (i+1) * 500000 zdata = misc.extract_file("res/test5/rec{}/zeros/zt".format(recct)) for z in zdata: data = np.add(data, z) data = np.true_divide(data, sum(data)) bdata = merge_bins(data) ax = bar1234(data, bdata) ax.set_title("Figure 1: Zero Distribution (Report 4 Global Average)") misc.savefig("rep/rep6a/fig1", plt) plt.close()
def plot_dists(ztype, ysize, title, *, file_name=None): fig, axs = plt.subplots(ysize, 10, sharey=True, figsize=(15, ysize * 3 / 4)) for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format( recct, ztype)) for j in range(ysize): d = norm(data[j]) axy = ysize - 1 - j axs[axy][i].plot(d) if axs[axy][i].is_first_col(): axs[axy][i].set_ylabel(j + 1) axs[axy][i].set_yticklabels([]) else: axs[axy][i].get_yaxis().set_visible(False) if axs[axy][i].is_last_row(): axs[axy][i].set_xlabel(str(recct)) axs[axy][i].set_xticklabels([]) else: axs[axy][i].get_xaxis().set_visible(False) plt.figtext(0.5, -0.007 + 0.0024 * ysize, "# of Records", ha="center", fontsize=18) plt.figtext(0.08, 0.5, "Segment #", va="center", fontsize=18, rotation="vertical") plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=24) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def extract_comps(ctype): nsizes = [] for prefix in ['a', 'b', 'c', 'd', 'e']: dirs = glob.glob("res/test6/{}/*".format(prefix)) dirs = sorted( list(filter(lambda x: x.split('/')[-1] != 'status', dirs))) sizes_list = [] for i, dir_ in enumerate(dirs): data = misc.extract_file("{}/lz4".format(dir_)) sizes = np.zeros(500) for j, d in enumerate(data): sizes[j] = d[-1] / sum(d[:-1]) * 5 sizes_list.append(sizes) for s in sizes_list: nsizes.append(s) return np.true_divide(np.sum(nsizes, axis=0), len(nsizes))
def plot6a2(): data = np.zeros(119, dtype=np.float64) c = 0 for prefix in ['a', 'b', 'c', 'd', 'e']: for fc in range(1, 6): ef_data = misc.extract_file( 'res/test6/{}/fc{}/zeros/zt'.format(prefix, fc)) for efs_data in ef_data: data = np.add(data, efs_data) c += 1 data = np.true_divide(data, c) data = np.true_divide(data, sum(data)) data = data.tolist() data = data[:-1] + [0] * 130 + [data[-1]] bdata = merge_bins(data) ax = bar1234(data, bdata) ax.set_title("Figure 2: Zero Distribution (Report 5 Global Average)") misc.savefig("rep/rep6a/fig2", plt) plt.close()
def extract_ztotals(*, low=0, high=251, subtest=7): data = misc.extract_file("res/test{}/zeros/zt".format(subtest)) res = [] for d in data: res.append(sum(d[low:high])) return np.array(res)
def extract_sizes(*, subtest=7): data = misc.extract_file("res/test{}/lz4".format(subtest)) res = [] for d in data: res.append(d[-1]) return np.array(res)