def plot_floprate(): cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] vecdot_3_11 = [] vecdot_3_12 = [] vecaxpy_3_11 = [] vecaxpy_3_12 = [] for size in cpu_sizes: vecdot_3_11.append( float( ut.get_floprate( "../data/petsc-v3.11.3/vec_ops.n2_g0_c21_p42.petsc_v3.11.3." + str(size), "VecDot", True, 1))) vecdot_3_12.append( float( ut.get_floprate( "../data/petsc-v3.12/vec_ops.n2_g0_c21_p42.petsc_v3.12." + str(size), "VecDot", True, 1))) vecaxpy_3_11.append( float( ut.get_floprate( "../data/petsc-v3.11.3/vec_ops.n2_g0_c21_p42.petsc_v3.11.3." + str(size), "VecAXPY", True, 3))) vecaxpy_3_12.append( float( ut.get_floprate( "../data/petsc-v3.12/vec_ops.n2_g0_c21_p42.petsc_v3.12." + str(size), "VecAXPY", True, 3))) # plot plt.plot(cpu_sizes, vecdot_3_11, color="red", label="VecDot v3.11.3") plt.plot(cpu_sizes, vecdot_3_12, color="red", linestyle="dashed", label="VecDot v3.12") plt.plot(cpu_sizes, vecaxpy_3_11, color="black", label="VecAXPY v3.11.3") plt.plot(cpu_sizes, vecaxpy_3_12, color="black", linestyle="dashed", label="VecAXPY v3.12") plt.title("CPU floprate", fontsize=12) plt.xlabel("Vector size", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False) plt.xscale('log') plt.yscale('log') plt.tight_layout() plt.savefig("../plots/CPU_311_vs_312_floprate.png") plt.show()
def plot_floprate(): cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] vecdot_sept = [] vecdot_dec = [] vecaxpy_sept = [] vecaxpy_dec = [] for size in cpu_sizes: vecdot_sept.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecDot", True, 1))) vecdot_dec.append( float( ut.get_floprate( "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." + str(size) + ".795805", "VecDot", True, 1))) vecaxpy_sept.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecAXPY", True, 3))) vecaxpy_dec.append( float( ut.get_floprate( "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." + str(size) + ".795805", "VecAXPY", True, 3))) # plot plt.plot(cpu_sizes, vecdot_sept, color="red", label="VecDot 09/19") plt.plot(cpu_sizes, vecdot_dec, color="red", linestyle="dashed", label="VecDot 12/19") plt.plot(cpu_sizes, vecaxpy_sept, color="black", label="VecAXPY 09/19") plt.plot(cpu_sizes, vecaxpy_dec, color="black", linestyle="dashed", label="VecAXPY 12/19") plt.title("CPU floprate 09/19-12/19", fontsize=12) plt.xlabel("Vector size", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False) plt.xscale('log') plt.yscale('log') plt.tight_layout() plt.savefig("../plots/CPU_sept_vs_dec_floprate.png") plt.show()
def gpu_flops(operation, count, show): gpus = range(1, 7) sizes = [100000, 1000000, 10000000, 100000000, 1000000000] sizes_str = ["$10^5$", "$10^6$", "$10^7$", "$10^8$", "$10^9$"] # get flop rates data = [] for size in sizes: flop_rates = [] for gpu in gpus: if operation == "VecDot": rate = ut.get_floprate( "../data/waitforgpu/vec_ops.n" + str(gpu) + "_g1_c2_a1." + str(size) + ".718552", operation, False, count) flop_rates.append(float(rate)) elif operation == "VecAXPY": rate = ut.get_floprate( "../data/vec-ops/vec_ops.n" + str(gpu) + "_g1_c2_a1." + str(size) + ".654909", operation, False, count) flop_rates.append(float(rate)) data.append(flop_rates) # plot num = len(sizes) cm = plt.get_cmap('inferno') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm((1. * i) / num) for i in range(num)]) for i in range(num): ax.plot(gpus, data[i], marker="o", label="Vec size " + sizes_str[i]) plt.title(operation + " GPU performance", fontsize=12) plt.xlabel("Number of GPUs", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="upper left", fontsize=12, frameon=False) plt.xlim([0, 7]) ax.set_yticklabels( ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) plt.tight_layout() plt.savefig("../plots/GPU_" + operation + "_flops.png") if show: plt.show()
def cpu_flops(operation, count, show): ranks = range(1, 43) vecsize = [10000, 1000000, 100000000] vecsize_str = ["$10^4$", "$10^6$", "$10^8$"] # get flop rates data = [] for size in vecsize: flop_rates = [] for rank in ranks: # rate = ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p" + str(rank) + "." + str(size) + ".654910", operation, True, count) rate = ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p" + str(rank) + "." + str(size), operation, True, count) flop_rates.append(float(rate)) data.append(flop_rates) # plot num = len(vecsize) cm = plt.get_cmap('inferno') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm((1. * i) / num) for i in range(num)]) for i in range(num): ax.plot(ranks, data[i], marker="o", linestyle="none", label="Vec size " + vecsize_str[i]) plt.title(operation + " CPU performance", fontsize=12) plt.xlabel("Number of CPU cores", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="upper left", fontsize=12, frameon=False) plt.tight_layout() plt.xlim([-3, 43]) ax.set_yticklabels( ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) plt.savefig("../plots/CPU_" + operation + "_flops.png") if show: plt.show()
def cpu_vs_gpu(operation, count, show): gpu_sizes = [ 1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000, 10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000 ] cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] gpu = [] cpu = [] if operation == "VecDot": mem_scale = 1 elif operation == "VecAXPY": mem_scale = 1.5 for size in gpu_sizes: if operation == "VecDot": scale = 1 gpu.append(scale * float( ut.get_floprate( "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) + ".718559", operation, False, count))) # need to get this data elif operation == "VecAXPY": scale = 1 gpu.append(scale * float( ut.get_floprate( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", operation, False, count))) # need to get this data for size in cpu_sizes: if operation == "VecDot": scale = 1 elif operation == "VecAXPY": scale = 1 cpu.append(scale * float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), operation, True, count))) # plot fig, left = plt.subplots() right = left.twinx() cm = plt.get_cmap('inferno') left.plot(cpu_sizes, cpu, color=cm((1. * 2) / 4), label="42 CPU cores " + operation) left.plot(gpu_sizes, gpu, color=cm((1. * 1 - 1) / 4), label="6 GPUs " + operation) plt.xlim([500, 2000000000]) left.set_title("GPU vs CPU " + operation + " performance", fontsize=12) left.set_xlabel("Vector size", fontsize=12) left.set_ylabel("MFlops/second", fontsize=12) left.legend(loc="upper left", fontsize=12, ncol=1, frameon=False) left.set_xscale('log') left.set_yscale('log') right.set_yscale('log') right.get_yaxis().set_visible(False) left.set_ylim(top=10000000) right.set_ylim(top=10000000 * mem_scale) right.set_ylim(bottom=20) left.set_ylim(bottom=20) plt.tight_layout() plt.savefig("../plots/" + operation + "_CPU_vs_GPU_siampp.png") if show: plt.show()
def cpu_vs_gpu(operation, count, show): gpu_sizes = [ 1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000, 10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000 ] cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] gpu = [] cpu = [] gpu_VecCopy = [] cpu_VecCopy = [] gpu_ToGpu = [] if operation == "VecDot": mem_scale = 1 elif operation == "VecAXPY": mem_scale = 1.5 for size in gpu_sizes: if operation == "VecDot": scale = 1 gpu.append(scale * float( ut.get_floprate( "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) + ".718559", operation, False, count))) # need to get this data elif operation == "VecAXPY": scale = 1 gpu.append(scale * float( ut.get_floprate( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", operation, False, count))) # need to get this data scale = 2 # two memory access time = ut.get_time( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", "VecCopy", 1) gpu_VecCopy.append(scale * ut.calc_rate(size, time)) # pinned memory scale = 1 if gpu_sizes <= 100000: run_num = ".732319" else: run_num = ".715071" time = ut.get_time( "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num, "VecCUDACopyTo", 1) gpu_ToGpu.append(scale * ut.calc_rate(size, time)) for size in cpu_sizes: if operation == "VecDot": scale = 1 elif operation == "VecAXPY": scale = 1 # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count))) cpu.append(scale * float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), operation, True, count))) # cpu.append(scale*float(ut.get_floprate("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", operation, True, count))) scale = 2 # two memory access # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1) time = ut.get_time( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), "VecCopy", 1) # time = ut.get_time("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", "VecCopy", 1) cpu_VecCopy.append(scale * ut.calc_rate(size, time)) # calculate peak rates in 8 Mbyes/second cpu_rate = 135 * 1e9 gpu_rate = 900 * 1e9 cpu_peak = (2 * cpu_rate) / (8 * 1e6) gpu_peak = (6 * gpu_rate) / (8 * 1e6) cpu_to_gpu_rate = 50 * 1e9 cpu_to_gpu_peak = (6 * cpu_to_gpu_rate) / (8 * 1e6) # plot fig, left = plt.subplots() right = left.twinx() cm = plt.get_cmap('inferno') left.plot(cpu_sizes, cpu, color=cm((1. * 2) / 4), label="42 CPU cores " + operation) right.plot(cpu_sizes, cpu_VecCopy, color=cm((1. * 2) / 4), linestyle="dashed", label="42 CPU cores VecCopy") right.plot(gpu_sizes, gpu_ToGpu, color=cm((1. * 3) / 4), linestyle="dashed", label="6 GPUs copy to GPU") left.plot(gpu_sizes, gpu, color=cm((1. * 1 - 1) / 4), label="6 GPUs " + operation) right.plot(gpu_sizes, gpu_VecCopy, color=cm((1. * 1 - 1) / 4), linestyle="dashed", label="6 GPUs VecCopy") plt.plot(2000000000, gpu_peak, color=cm((1. * 1 - 1) / 4), linestyle="none", markersize="15", markeredgewidth=2, marker="_", label="GPU copy peak", clip_on=False) plt.plot(2000000000, cpu_to_gpu_peak, color=cm((1. * 3) / 4), linestyle="none", markersize="15", markeredgewidth=2, marker="_", label="CPU to GPU peak", clip_on=False) plt.plot(2000000000, cpu_peak, color=cm((1. * 2) / 4), linestyle="none", markersize="15", markeredgewidth=2, marker="_", label="CPU copy peak", clip_on=False) plt.xlim([500, 2000000000]) left.set_title("GPU vs CPU " + operation + " performance", fontsize=12) left.set_xlabel("Vector size", fontsize=12) left.set_ylabel("MFlops/second", fontsize=12) right.set_ylabel("8 MBytes/second", fontsize=12) left.legend(loc="upper left", fontsize=12, ncol=1, frameon=False) plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False) left.set_xscale('log') left.set_yscale('log') right.set_yscale('log') left.set_ylim(top=10000000) right.set_ylim(top=10000000 * mem_scale) right.set_ylim(bottom=20) left.set_ylim(bottom=20) plt.tight_layout() # plt.savefig("../plots/" + operation + "_CPU_vs_GPU.png") if show: plt.show()
def synthetic_latency(operation, count, show): # get data sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] gpu = [] cpu = [] gpu_VecCopy = [] cpu_VecCopy = [] gpu_ToGpu = [] gpu_16 = [] gpu_28_time = [] gpu_24 = [] gpu_28 = [] for size in sizes: # floprate from file floprate = float( ut.get_floprate( "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914", operation, False, count)) gpu.append(floprate) # time from file time = ut.get_time( "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914", operation, count) # create synthetic floprates if operation == "VecAXPY": labels = ["16", "24", "28"] gpu_16.append((2 * size * 1e-6) / (time - 16e-6)) gpu_24.append((2 * size * 1e-6) / (time - 24e-6)) gpu_28.append((2 * size * 1e-6) / (time - 28e-6)) elif operation == "VecDot": labels = ["16", "24", "50"] gpu_16.append((2 * size * 1e-6) / (time - 16e-6)) gpu_24.append((2 * size * 1e-6) / (time - 24e-6)) gpu_28.append((2 * size * 1e-6) / (time - 50e-6)) # VecDot bigger latencies in data # other operations cpu.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count))) time = ut.get_time( "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914", "VecCopy", 1) gpu_VecCopy.append(ut.calc_rate(size, time)) time = ut.get_time( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1) cpu_VecCopy.append(ut.calc_rate(size, time)) time = ut.get_time( "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914", "VecCUDACopyTo", 1) gpu_ToGpu.append(ut.calc_rate(size, time)) # plot plt.plot(sizes, cpu, color="grey", alpha=0.5, marker=".", markersize="6", markeredgewidth=2, label="42 CPUs " + operation) plt.plot(sizes, gpu, color="black", marker=".", markersize="6", markeredgewidth=2, label="6 GPUs " + operation) plt.plot(sizes, gpu_24, color="black", marker=".", markersize="6", markeredgewidth=2, linestyle="dotted", label="$" + labels[1] + "\cdot10^{-6}$ latency") plt.title(operation + " performance without calculated latency", fontsize=12) plt.xlabel("Vector size", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="lower right", fontsize=12, frameon=False) plt.xscale('log') plt.yscale('log') plt.ylim(top=1000000) plt.tight_layout() plt.savefig("../plots/" + operation + "_synthetic_latency.png") if show: plt.show() plt.gcf().clear()
def jed_cpu_vs_gpu(operation, count, show): gpu_sizes = [ 1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000, 10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000 ] cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] gpu = [] cpu = [] gpu_time = [] cpu_time = [] gpu_VecCopy = [] cpu_VecCopy = [] gpu_VecCopy_time = [] cpu_VecCopy_time = [] gpu_ToGpu = [] gpu_ToGpu_time = [] if operation == "VecDot": mem_scale = 1 elif operation == "VecAXPY": mem_scale = 1.5 for size in gpu_sizes: if operation == "VecDot": scale = 1 # operation time and floprate gpu_time.append( ut.get_time( "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) + ".718559", operation, count)) gpu.append(scale * float( ut.get_floprate( "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) + ".718559", operation, False, count))) elif operation == "VecAXPY": scale = 1 gpu_time.append( ut.get_time( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", operation, count)) gpu.append(scale * float( ut.get_floprate( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", operation, False, count))) # GPU copy time and bandwidth scale = 2 / mem_scale time = ut.get_time( "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) + ".668627", "VecCopy", 1) gpu_VecCopy_time.append(time) gpu_VecCopy.append(scale * ut.calc_rate(size, time)) # GPU to CPU time and bandwidth, pinned memory scale = 1 / mem_scale if gpu_sizes <= 100000: run_num = ".732319" else: run_num = ".715071" time = ut.get_time( "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num, "VecCUDACopyTo", 1) gpu_ToGpu_time.append(time) gpu_ToGpu.append(scale * ut.calc_rate(size, time)) for size in cpu_sizes: scale = 1 # CPU operation time and bandwidth # cpu_time.append(ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, count)) # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count))) cpu_time.append( ut.get_time( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), operation, count)) cpu.append(scale * float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), operation, True, count))) # CPU copy time and bandwidth scale = 2 / mem_scale # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1) time = ut.get_time( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), "VecCopy", 1) cpu_VecCopy_time.append(time) cpu_VecCopy.append(scale * ut.calc_rate(size, time)) # calculate peak rates in 8 Mbyes/second cpu_rate = 135 * 1e9 gpu_rate = 900 * 1e9 cpu_peak = (2 * cpu_rate) / (8 * 1e6) gpu_peak = (6 * gpu_rate) / (8 * 1e6) # plot fig, left = plt.subplots() right = left.twinx() cm = plt.get_cmap('inferno') left.plot(cpu_time, cpu, color=cm((1. * 2) / 4), label="42 CPU cores " + operation) left.plot(cpu_VecCopy_time, cpu_VecCopy, color=cm((1. * 2) / 4), linestyle="dashed", label="42 CPU cores VecCopy") left.plot(gpu_ToGpu_time, gpu_ToGpu, color=cm((1. * 3) / 4), linestyle="dashed", label="6 GPUs copy to GPU") left.plot(gpu_time, gpu, color=cm((1. * 1 - 1) / 4), label="6 GPUs " + operation) left.plot(gpu_VecCopy_time, gpu_VecCopy, color=cm((1. * 1 - 1) / 4), linestyle="dashed", label="6 GPUs VecCopy") left.set_title("GPU vs CPU " + operation + " performance", fontsize=12) left.set_xlabel("Execution time (seconds)", fontsize=12) left.set_ylabel("MFlops/second", fontsize=12) right.set_ylabel("8 MBytes/second", fontsize=12) left.legend(loc="lower right", fontsize=12, ncol=1, frameon=False) # markerfirst=False left.set_xscale('log') left.set_yscale('log') right.set_yscale('log') left.set_ylim([7, 1000000]) right.set_ylim([7, 1000000 * mem_scale]) plt.xlim([1e-6, .2]) plt.savefig("../plots/jed_" + operation + "_CPU_vs_GPU.png") plt.tight_layout() if show: plt.show()
def virtualization_plot(operation, count, ylims, show): s3 = [] s4 = [] s5 = [] s6 = [] s7 = [] ranks = range(1, 9) for rank in ranks: s3.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." + str(1000) + ".654911", operation, False, count))) for rank in ranks: s4.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." + str(10000) + ".654911", operation, False, count))) for rank in ranks: s5.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." + str(100000) + ".654911", operation, False, count))) for rank in ranks: s6.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." + str(1000000) + ".654911", operation, False, count))) for rank in ranks: s7.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." + str(10000000) + ".654911", operation, False, count))) # plot num = 4 cm = plt.get_cmap('inferno') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm((1. * i) / num) for i in range(num)]) ax.plot(ranks, s4, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="$10^4$") ax.plot(ranks, s5, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="$10^5$") ax.plot(ranks, s6, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="$10^6$") ax.plot(ranks, s7, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="$10^7$") plt.title(operation + " virtualization performance on 1 GPU", fontsize=12) plt.xlabel("MPI ranks", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="upper right", fontsize=12, ncol=4, frameon=False) plt.xlim([0.5, 8.5]) plt.ylim(ylims) plt.tight_layout() ax.set_yticklabels( ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) plt.savefig("../plots/" + operation + "_virtualization_1_GPU.png") if show: plt.show() plt.gcf().clear()
def cpu_vs_gpu(operation, count, clear, show): cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] cpu_flush = [] cpu_half_flush = [] cpu_no_flush = [] cpu_permute = [] cpu_flush_vecset = [] cpu_half_flush_december = [] cpu_VecCopy_flush = [] cpu_VecCopy_half_flush = [] cpu_VecCopy_no_flush = [] if operation == "VecDot": mem_scale = 1 elif operation == "VecAXPY": mem_scale = 1.5 for size in cpu_sizes: if operation == "VecDot": scale = 1 elif operation == "VecAXPY": scale = 1 cpu_flush.append(scale * float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767597", operation, True, count))) cpu_half_flush.append(scale * float( ut.get_floprate( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count))) cpu_half_flush_december.append(scale * float( ut.get_floprate( "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." + str(size) + ".795805", operation, True, count))) cpu_no_flush.append(scale * float( ut.get_floprate( "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", operation, True, count))) cpu_flush_vecset.append(scale * float( ut.get_floprate( "../data/cpu-flush-cache-vecset/vec_ops.n2_g0_c21_p42." + str(size) + ".792547", operation, True, count))) cpu_permute.append(scale * float( ut.get_floprate( "../data/permute-operations/vec_ops.n2_g0_c21_p42." + str(size) + ".792549", operation, True, count))) scale = 2 / mem_scale # VecCopy time = ut.get_time( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767597", "VecCopy", 1) cpu_VecCopy_flush.append(scale * ut.calc_rate(size, time)) time = ut.get_time( "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1) cpu_VecCopy_half_flush.append(scale * ut.calc_rate(size, time)) time = ut.get_time( "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", "VecCopy", 1) cpu_VecCopy_no_flush.append(scale * ut.calc_rate(size, time)) print cpu_half_flush_december[0] # plot fig, left = plt.subplots() right = left.twinx() cm = plt.get_cmap('inferno') left.plot(cpu_sizes, cpu_flush, color=cm((1. * 2) / 4), label=operation + " cleared cache") left.plot(cpu_sizes, cpu_half_flush, color=cm((1. * 2) / 4), linestyle="dashed", label=operation + " half cleared cache") left.plot(cpu_sizes, cpu_half_flush_december, color="black", label=operation + " December") left.plot(cpu_sizes, cpu_no_flush, color=cm((1. * 2) / 4), linestyle="dotted", label=operation + " uncleared cache") # left.plot(cpu_sizes, cpu_permute, color="black", linestyle="dashed", label=operation+ " another cleared") # left.plot(cpu_sizes, cpu_permute, color="black", label=operation+ " rearrange operations") # left.plot(cpu_sizes, cpu_VecCopy_flush, color=cm((1.*1-1)/4), label="VecCopy cleared cache") # left.plot(cpu_sizes, cpu_VecCopy_half_flush, color=cm((1.*1-1)/4), linestyle="dashed", label="VecCopy half cleared cache") # left.plot(cpu_sizes, cpu_VecCopy_no_flush, color=cm((1.*1-1)/4), linestyle="dotted", label="VecCopy uncleared cache") plt.xlim([500, 2000000000]) left.set_title("CPU " + operation + " cache performance", fontsize=12) left.set_xlabel("Vector size", fontsize=12) left.set_ylabel("MFlops/second", fontsize=12) right.set_ylabel("8 MBytes/second", fontsize=12) left.legend(loc="lower right", fontsize=12, ncol=1, frameon=False) plt.legend(loc="upper left", fontsize=12, ncol=1, frameon=False) left.set_xscale('log') left.set_yscale('log') right.set_yscale('log') top_ = 1000000 left.set_ylim(top=top_) right.set_ylim(top=top_ * mem_scale) right.set_ylim(bottom=20) left.set_ylim(bottom=20) plt.tight_layout() # plt.savefig("../plots/" + operation + "_CPU_cleared_cache.png") if show: plt.show()
def cpu_gpu_flops(operation, count, normed, ncols, show): gpu_sizes = [ 1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000, 10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000 ] cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000] gpu1 = [] gpu2 = [] gpu3 = [] gpu6 = [] cpu7 = [] cpu14 = [] cpu21 = [] cpu28 = [] cpu35 = [] cpu42 = [] if operation == "VecDot": path = "waitforgpu" run_num = ".718559" elif operation == "VecAXPY": path = "figures-2-7-8-9" run_num = ".668627" for size in gpu_sizes: gpu1.append( float( ut.get_floprate( "../data/" + path + "/vec_ops.n1_g1_c2_a1." + str(size) + run_num, operation, False, count))) gpu2.append( float( ut.get_floprate( "../data/" + path + "/vec_ops.n2_g1_c2_a1." + str(size) + run_num, operation, False, count))) gpu3.append( float( ut.get_floprate( "../data/" + path + "/vec_ops.n3_g1_c2_a1." + str(size) + run_num, operation, False, count))) gpu6.append( float( ut.get_floprate( "../data/" + path + "/vec_ops.n6_g1_c2_a1." + str(size) + run_num, operation, False, count))) for size in cpu_sizes: # cpu7.append( float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p7." + str(size) + ".654910", operation, True, count))) # cpu14.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p14." + str(size) + ".654910", operation, True, count))) # cpu21.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p21." + str(size) + ".654910", operation, True, count))) # cpu28.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p28." + str(size) + ".654910", operation, True, count))) # cpu35.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p35." + str(size) + ".654910", operation, True, count))) # cpu42.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count))) cpu7.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p7." + str(size), operation, True, count))) cpu14.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p14." + str(size), operation, True, count))) cpu21.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p21." + str(size), operation, True, count))) cpu28.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p28." + str(size), operation, True, count))) cpu35.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p35." + str(size), operation, True, count))) cpu42.append( float( ut.get_floprate( "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size), operation, True, count))) if normed: gpu2[:] = [x / 2.0 for x in gpu2] gpu3[:] = [x / 3.0 for x in gpu3] gpu6[:] = [x / 6.0 for x in gpu6] cpu7[:] = [x / 7.0 for x in cpu7] cpu14[:] = [x / 14.0 for x in cpu14] cpu21[:] = [x / 21.0 for x in cpu21] cpu28[:] = [x / 28.0 for x in cpu28] cpu35[:] = [x / 35.0 for x in cpu35] cpu42[:] = [x / 42.0 for x in cpu42] # plot num = 10 cm = plt.get_cmap('inferno') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm((1. * i) / num) for i in range(num)]) ax.plot(cpu_sizes, cpu7, marker="o", markersize="4", markeredgewidth=2, label="7 CPU cores") ax.plot(cpu_sizes, cpu14, marker="o", markersize="4", markeredgewidth=2, label="14 CPU cores") ax.plot(cpu_sizes, cpu21, marker="o", markersize="4", markeredgewidth=2, label="21 CPU cores") ax.plot(cpu_sizes, cpu28, marker="o", markersize="4", markeredgewidth=2, label="28 CPU cores") ax.plot(cpu_sizes, cpu35, marker="o", markersize="4", markeredgewidth=2, label="35 CPU cores") ax.plot(cpu_sizes, cpu42, marker="o", markersize="4", markeredgewidth=2, label="42 CPU cores") ax.plot(gpu_sizes, gpu1, marker="o", markersize="4", markeredgewidth=2, label="1 GPU") ax.plot(gpu_sizes, gpu2, marker="o", markersize="4", markeredgewidth=2, label="2 GPUs") ax.plot(gpu_sizes, gpu3, marker="o", markersize="4", markeredgewidth=2, label="3 GPUs") ax.plot(gpu_sizes, gpu6, marker="o", markersize="4", markeredgewidth=2, label="6 GPUs") plt.title(operation + " performance", fontsize=12) plt.xlabel("Vector size", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="upper left", fontsize=12, ncol=ncols, frameon=False) plt.tight_layout() plt.xscale('log') ax.set_yticklabels( ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) plt.savefig("../plots/CPU_GPU_" + operation + "_flops" + ("_norm" if normed else "") + ".png") if show: plt.show()
def gpu_virtualization(operation, count, height, show): gpu1 = [] gpu2 = [] gpu3 = [] gpu6 = [] size = 100000000 size_str = "10^8" if operation == "VecDot": for cpus in range(1, 42): gpu1.append( float( ut.get_floprate( "../data/waitforgpu/vec_ops.n1_g1_c42_a" + str(cpus) + "." + str(size) + ".718553", operation, False, count))) for cpus in range(1, 22): gpu2.append( float( ut.get_floprate( "../data/waitforgpu/vec_ops.n2_g1_c21_a" + str(cpus) + "." + str(size) + ".718554", operation, False, count))) for cpus in range(1, 15): gpu3.append( float( ut.get_floprate( "../data/waitforgpu/vec_ops.n3_g1_c14_a" + str(cpus) + "." + str(size) + ".718555", operation, False, count))) for cpus in range(1, 8): gpu6.append( float( ut.get_floprate( "../data/waitforgpu/vec_ops.n6_g1_c7_a" + str(cpus) + "." + str(size) + ".718556", operation, False, count))) elif operation == "VecAXPY": for cpus in range(1, 42): gpu1.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(cpus) + "." + str(size) + ".654911", operation, False, count))) for cpus in range(1, 22): gpu2.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n2_g1_c21_a" + str(cpus) + "." + str(size) + ".654912", operation, False, count))) for cpus in range(1, 15): gpu3.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n3_g1_c14_a" + str(cpus) + "." + str(size) + ".654913", operation, False, count))) for cpus in range(1, 8): gpu6.append( float( ut.get_floprate( "../data/vec-ops/vec_ops.n6_g1_c7_a" + str(cpus) + "." + str(size) + ".654914", operation, False, count))) # plot num = 4 cm = plt.get_cmap('inferno') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm((1. * i) / num) for i in range(num)]) ax.plot(range(1, 42), gpu1, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="1 GPU") ax.plot(range(2, 43, 2), gpu2, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="2 GPUs") ax.plot(range(3, 43, 3), gpu3, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="3 GPUs") ax.plot(range(6, 43, 6), gpu6, marker="o", markersize="6", markeredgewidth=2, linestyle="none", label="6 GPUs") plt.title(operation + " virtualization performance", fontsize=12) plt.xlabel("MPI ranks", fontsize=12) plt.ylabel("MFlops/second", fontsize=12) plt.legend(loc="upper right", ncol=2, fontsize=12, frameon=False) plt.xlim([0, 43]) plt.ylim(top=height) plt.tight_layout() ax.set_yticklabels( ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) plt.savefig("../plots/" + operation + "_virtualization_" + size_str + ".png") if show: plt.show()