コード例 #1
0
def plot_floprate():

    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    vecdot_3_11 = []
    vecdot_3_12 = []
    vecaxpy_3_11 = []
    vecaxpy_3_12 = []

    for size in cpu_sizes:

        vecdot_3_11.append(
            float(
                ut.get_floprate(
                    "../data/petsc-v3.11.3/vec_ops.n2_g0_c21_p42.petsc_v3.11.3."
                    + str(size), "VecDot", True, 1)))
        vecdot_3_12.append(
            float(
                ut.get_floprate(
                    "../data/petsc-v3.12/vec_ops.n2_g0_c21_p42.petsc_v3.12." +
                    str(size), "VecDot", True, 1)))

        vecaxpy_3_11.append(
            float(
                ut.get_floprate(
                    "../data/petsc-v3.11.3/vec_ops.n2_g0_c21_p42.petsc_v3.11.3."
                    + str(size), "VecAXPY", True, 3)))
        vecaxpy_3_12.append(
            float(
                ut.get_floprate(
                    "../data/petsc-v3.12/vec_ops.n2_g0_c21_p42.petsc_v3.12." +
                    str(size), "VecAXPY", True, 3)))

    # plot
    plt.plot(cpu_sizes, vecdot_3_11, color="red", label="VecDot v3.11.3")
    plt.plot(cpu_sizes,
             vecdot_3_12,
             color="red",
             linestyle="dashed",
             label="VecDot v3.12")
    plt.plot(cpu_sizes, vecaxpy_3_11, color="black", label="VecAXPY v3.11.3")
    plt.plot(cpu_sizes,
             vecaxpy_3_12,
             color="black",
             linestyle="dashed",
             label="VecAXPY v3.12")

    plt.title("CPU floprate", fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    plt.xscale('log')
    plt.yscale('log')
    plt.tight_layout()

    plt.savefig("../plots/CPU_311_vs_312_floprate.png")
    plt.show()
コード例 #2
0
def plot_floprate():

    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    vecdot_sept = []
    vecdot_dec = []
    vecaxpy_sept = []
    vecaxpy_dec = []

    for size in cpu_sizes:

        vecdot_sept.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                    ".654910", "VecDot", True, 1)))
        vecdot_dec.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." +
                    str(size) + ".795805", "VecDot", True, 1)))

        vecaxpy_sept.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                    ".654910", "VecAXPY", True, 3)))
        vecaxpy_dec.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." +
                    str(size) + ".795805", "VecAXPY", True, 3)))

    # plot
    plt.plot(cpu_sizes, vecdot_sept, color="red", label="VecDot 09/19")
    plt.plot(cpu_sizes,
             vecdot_dec,
             color="red",
             linestyle="dashed",
             label="VecDot 12/19")
    plt.plot(cpu_sizes, vecaxpy_sept, color="black", label="VecAXPY 09/19")
    plt.plot(cpu_sizes,
             vecaxpy_dec,
             color="black",
             linestyle="dashed",
             label="VecAXPY 12/19")

    plt.title("CPU floprate 09/19-12/19", fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    plt.xscale('log')
    plt.yscale('log')
    plt.tight_layout()

    plt.savefig("../plots/CPU_sept_vs_dec_floprate.png")
    plt.show()
コード例 #3
0
def gpu_flops(operation, count, show):

    gpus = range(1, 7)
    sizes = [100000, 1000000, 10000000, 100000000, 1000000000]
    sizes_str = ["$10^5$", "$10^6$", "$10^7$", "$10^8$", "$10^9$"]

    # get flop rates
    data = []

    for size in sizes:
        flop_rates = []

        for gpu in gpus:
            if operation == "VecDot":
                rate = ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n" + str(gpu) + "_g1_c2_a1." +
                    str(size) + ".718552", operation, False, count)
                flop_rates.append(float(rate))

            elif operation == "VecAXPY":
                rate = ut.get_floprate(
                    "../data/vec-ops/vec_ops.n" + str(gpu) + "_g1_c2_a1." +
                    str(size) + ".654909", operation, False, count)
                flop_rates.append(float(rate))

        data.append(flop_rates)

    # plot
    num = len(sizes)
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    for i in range(num):
        ax.plot(gpus, data[i], marker="o", label="Vec size " + sizes_str[i])

    plt.title(operation + " GPU performance", fontsize=12)
    plt.xlabel("Number of GPUs", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="upper left", fontsize=12, frameon=False)
    plt.xlim([0, 7])
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])
    plt.tight_layout()

    plt.savefig("../plots/GPU_" + operation + "_flops.png")
    if show: plt.show()
コード例 #4
0
def cpu_flops(operation, count, show):

    ranks = range(1, 43)
    vecsize = [10000, 1000000, 100000000]
    vecsize_str = ["$10^4$", "$10^6$", "$10^8$"]

    # get flop rates
    data = []

    for size in vecsize:
        flop_rates = []

        for rank in ranks:
            # rate = ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p" + str(rank) + "." + str(size) + ".654910", operation, True, count)
            rate = ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p" + str(rank) +
                "." + str(size), operation, True, count)

            flop_rates.append(float(rate))

        data.append(flop_rates)

    # plot
    num = len(vecsize)
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    for i in range(num):
        ax.plot(ranks,
                data[i],
                marker="o",
                linestyle="none",
                label="Vec size " + vecsize_str[i])

    plt.title(operation + " CPU performance", fontsize=12)
    plt.xlabel("Number of CPU cores", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="upper left", fontsize=12, frameon=False)
    plt.tight_layout()
    plt.xlim([-3, 43])
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/CPU_" + operation + "_flops.png")
    if show: plt.show()
コード例 #5
0
def cpu_vs_gpu(operation, count, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in gpu_sizes:
        if operation == "VecDot":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, False,
                    count)))  # need to get this data

        elif operation == "VecAXPY":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, False,
                    count)))  # need to get this data

    for size in cpu_sizes:
        if operation == "VecDot":
            scale = 1
        elif operation == "VecAXPY":
            scale = 1
        cpu.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, True, count)))

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_sizes,
              cpu,
              color=cm((1. * 2) / 4),
              label="42 CPU cores " + operation)
    left.plot(gpu_sizes,
              gpu,
              color=cm((1. * 1 - 1) / 4),
              label="6 GPUs " + operation)

    plt.xlim([500, 2000000000])
    left.set_title("GPU vs CPU " + operation + " performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    left.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    right.get_yaxis().set_visible(False)
    left.set_ylim(top=10000000)
    right.set_ylim(top=10000000 * mem_scale)
    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    plt.savefig("../plots/" + operation + "_CPU_vs_GPU_siampp.png")
    if show: plt.show()
コード例 #6
0
def cpu_vs_gpu(operation, count, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_ToGpu = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in gpu_sizes:
        if operation == "VecDot":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, False,
                    count)))  # need to get this data

        elif operation == "VecAXPY":
            scale = 1
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, False,
                    count)))  # need to get this data

        scale = 2  # two memory access
        time = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu_VecCopy.append(scale * ut.calc_rate(size, time))

        # pinned memory
        scale = 1
        if gpu_sizes <= 100000:
            run_num = ".732319"
        else:
            run_num = ".715071"
        time = ut.get_time(
            "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num,
            "VecCUDACopyTo", 1)
        gpu_ToGpu.append(scale * ut.calc_rate(size, time))

    for size in cpu_sizes:
        if operation == "VecDot":
            scale = 1
        elif operation == "VecAXPY":
            scale = 1
        # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count)))
        cpu.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, True, count)))
        # cpu.append(scale*float(ut.get_floprate("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", operation, True, count)))

        scale = 2  # two memory access
        # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1)
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
            "VecCopy", 1)
        # time = ut.get_time("../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) + ".767590", "VecCopy", 1)
        cpu_VecCopy.append(scale * ut.calc_rate(size, time))

    # calculate peak rates in 8 Mbyes/second
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = (6 * gpu_rate) / (8 * 1e6)
    cpu_to_gpu_rate = 50 * 1e9
    cpu_to_gpu_peak = (6 * cpu_to_gpu_rate) / (8 * 1e6)

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_sizes,
              cpu,
              color=cm((1. * 2) / 4),
              label="42 CPU cores " + operation)
    right.plot(cpu_sizes,
               cpu_VecCopy,
               color=cm((1. * 2) / 4),
               linestyle="dashed",
               label="42 CPU cores VecCopy")
    right.plot(gpu_sizes,
               gpu_ToGpu,
               color=cm((1. * 3) / 4),
               linestyle="dashed",
               label="6 GPUs copy to GPU")
    left.plot(gpu_sizes,
              gpu,
              color=cm((1. * 1 - 1) / 4),
              label="6 GPUs " + operation)
    right.plot(gpu_sizes,
               gpu_VecCopy,
               color=cm((1. * 1 - 1) / 4),
               linestyle="dashed",
               label="6 GPUs VecCopy")

    plt.plot(2000000000,
             gpu_peak,
             color=cm((1. * 1 - 1) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="GPU copy peak",
             clip_on=False)
    plt.plot(2000000000,
             cpu_to_gpu_peak,
             color=cm((1. * 3) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="CPU to GPU peak",
             clip_on=False)
    plt.plot(2000000000,
             cpu_peak,
             color=cm((1. * 2) / 4),
             linestyle="none",
             markersize="15",
             markeredgewidth=2,
             marker="_",
             label="CPU copy peak",
             clip_on=False)

    plt.xlim([500, 2000000000])
    left.set_title("GPU vs CPU " + operation + " performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    plt.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    left.set_ylim(top=10000000)
    right.set_ylim(top=10000000 * mem_scale)
    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    # plt.savefig("../plots/" + operation + "_CPU_vs_GPU.png")
    if show: plt.show()
コード例 #7
0
def synthetic_latency(operation, count, show):

    # get data
    sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_ToGpu = []

    gpu_16 = []
    gpu_28_time = []
    gpu_24 = []
    gpu_28 = []

    for size in sizes:

        # floprate from file
        floprate = float(
            ut.get_floprate(
                "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
                operation, False, count))
        gpu.append(floprate)

        # time from file
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            operation, count)

        # create synthetic floprates
        if operation == "VecAXPY":
            labels = ["16", "24", "28"]
            gpu_16.append((2 * size * 1e-6) / (time - 16e-6))
            gpu_24.append((2 * size * 1e-6) / (time - 24e-6))
            gpu_28.append((2 * size * 1e-6) / (time - 28e-6))
        elif operation == "VecDot":
            labels = ["16", "24", "50"]
            gpu_16.append((2 * size * 1e-6) / (time - 16e-6))
            gpu_24.append((2 * size * 1e-6) / (time - 24e-6))
            gpu_28.append((2 * size * 1e-6) /
                          (time - 50e-6))  # VecDot bigger latencies in data

        # other operations
        cpu.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                    ".654910", operation, True, count)))

        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            "VecCopy", 1)
        gpu_VecCopy.append(ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910",
            "VecCopy", 1)
        cpu_VecCopy.append(ut.calc_rate(size, time))

        time = ut.get_time(
            "../data/vec-ops/vec_ops.n6_g1_c7_a1." + str(size) + ".654914",
            "VecCUDACopyTo", 1)
        gpu_ToGpu.append(ut.calc_rate(size, time))

    # plot
    plt.plot(sizes,
             cpu,
             color="grey",
             alpha=0.5,
             marker=".",
             markersize="6",
             markeredgewidth=2,
             label="42 CPUs " + operation)
    plt.plot(sizes,
             gpu,
             color="black",
             marker=".",
             markersize="6",
             markeredgewidth=2,
             label="6 GPUs " + operation)
    plt.plot(sizes,
             gpu_24,
             color="black",
             marker=".",
             markersize="6",
             markeredgewidth=2,
             linestyle="dotted",
             label="$" + labels[1] + "\cdot10^{-6}$ latency")

    plt.title(operation + " performance without calculated latency",
              fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="lower right", fontsize=12, frameon=False)
    plt.xscale('log')
    plt.yscale('log')
    plt.ylim(top=1000000)
    plt.tight_layout()

    plt.savefig("../plots/" + operation + "_synthetic_latency.png")
    if show: plt.show()
    plt.gcf().clear()
コード例 #8
0
def jed_cpu_vs_gpu(operation, count, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu = []
    cpu = []

    gpu_time = []
    cpu_time = []

    gpu_VecCopy = []
    cpu_VecCopy = []

    gpu_VecCopy_time = []
    cpu_VecCopy_time = []

    gpu_ToGpu = []
    gpu_ToGpu_time = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in gpu_sizes:
        if operation == "VecDot":
            scale = 1

            # operation time and floprate
            gpu_time.append(
                ut.get_time(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, count))
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/waitforgpu/vec_ops.n6_g1_c2_a1." + str(size) +
                    ".718559", operation, False, count)))
        elif operation == "VecAXPY":
            scale = 1

            gpu_time.append(
                ut.get_time(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, count))
            gpu.append(scale * float(
                ut.get_floprate(
                    "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." +
                    str(size) + ".668627", operation, False, count)))

        # GPU copy time and bandwidth
        scale = 2 / mem_scale
        time = ut.get_time(
            "../data/figures-2-7-8-9/vec_ops.n6_g1_c2_a1." + str(size) +
            ".668627", "VecCopy", 1)
        gpu_VecCopy_time.append(time)
        gpu_VecCopy.append(scale * ut.calc_rate(size, time))

        # GPU to CPU time and bandwidth, pinned memory
        scale = 1 / mem_scale
        if gpu_sizes <= 100000:
            run_num = ".732319"
        else:
            run_num = ".715071"
        time = ut.get_time(
            "../data/pinned/vec_ops.n6_g1_c7_a1." + str(size) + run_num,
            "VecCUDACopyTo", 1)
        gpu_ToGpu_time.append(time)
        gpu_ToGpu.append(scale * ut.calc_rate(size, time))

    for size in cpu_sizes:
        scale = 1
        # CPU operation time and bandwidth
        # cpu_time.append(ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, count))
        # cpu.append(scale*float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count)))

        cpu_time.append(
            ut.get_time(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, count))
        cpu.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
                operation, True, count)))

        # CPU copy time and bandwidth
        scale = 2 / mem_scale
        # time = ut.get_time("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", "VecCopy", 1)
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size),
            "VecCopy", 1)
        cpu_VecCopy_time.append(time)
        cpu_VecCopy.append(scale * ut.calc_rate(size, time))

    # calculate peak rates in 8 Mbyes/second
    cpu_rate = 135 * 1e9
    gpu_rate = 900 * 1e9
    cpu_peak = (2 * cpu_rate) / (8 * 1e6)
    gpu_peak = (6 * gpu_rate) / (8 * 1e6)

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_time,
              cpu,
              color=cm((1. * 2) / 4),
              label="42 CPU cores " + operation)
    left.plot(cpu_VecCopy_time,
              cpu_VecCopy,
              color=cm((1. * 2) / 4),
              linestyle="dashed",
              label="42 CPU cores VecCopy")
    left.plot(gpu_ToGpu_time,
              gpu_ToGpu,
              color=cm((1. * 3) / 4),
              linestyle="dashed",
              label="6 GPUs copy to GPU")
    left.plot(gpu_time,
              gpu,
              color=cm((1. * 1 - 1) / 4),
              label="6 GPUs " + operation)
    left.plot(gpu_VecCopy_time,
              gpu_VecCopy,
              color=cm((1. * 1 - 1) / 4),
              linestyle="dashed",
              label="6 GPUs VecCopy")

    left.set_title("GPU vs CPU " + operation + " performance", fontsize=12)
    left.set_xlabel("Execution time (seconds)", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="lower right", fontsize=12, ncol=1,
                frameon=False)  # markerfirst=False
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    left.set_ylim([7, 1000000])
    right.set_ylim([7, 1000000 * mem_scale])
    plt.xlim([1e-6, .2])
    plt.savefig("../plots/jed_" + operation + "_CPU_vs_GPU.png")
    plt.tight_layout()
    if show: plt.show()
コード例 #9
0
def virtualization_plot(operation, count, ylims, show):

    s3 = []
    s4 = []
    s5 = []
    s6 = []
    s7 = []

    ranks = range(1, 9)

    for rank in ranks:
        s3.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." +
                    str(1000) + ".654911", operation, False, count)))

    for rank in ranks:
        s4.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." +
                    str(10000) + ".654911", operation, False, count)))

    for rank in ranks:
        s5.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." +
                    str(100000) + ".654911", operation, False, count)))

    for rank in ranks:
        s6.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." +
                    str(1000000) + ".654911", operation, False, count)))

    for rank in ranks:
        s7.append(
            float(
                ut.get_floprate(
                    "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(rank) + "." +
                    str(10000000) + ".654911", operation, False, count)))

    # plot
    num = 4
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    ax.plot(ranks,
            s4,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="$10^4$")
    ax.plot(ranks,
            s5,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="$10^5$")
    ax.plot(ranks,
            s6,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="$10^6$")
    ax.plot(ranks,
            s7,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="$10^7$")

    plt.title(operation + " virtualization performance on 1 GPU", fontsize=12)
    plt.xlabel("MPI ranks", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="upper right", fontsize=12, ncol=4, frameon=False)
    plt.xlim([0.5, 8.5])
    plt.ylim(ylims)
    plt.tight_layout()
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/" + operation + "_virtualization_1_GPU.png")
    if show: plt.show()
    plt.gcf().clear()
コード例 #10
0
def cpu_vs_gpu(operation, count, clear, show):

    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    cpu_flush = []
    cpu_half_flush = []
    cpu_no_flush = []
    cpu_permute = []
    cpu_flush_vecset = []
    cpu_half_flush_december = []

    cpu_VecCopy_flush = []
    cpu_VecCopy_half_flush = []
    cpu_VecCopy_no_flush = []

    if operation == "VecDot":
        mem_scale = 1
    elif operation == "VecAXPY":
        mem_scale = 1.5

    for size in cpu_sizes:
        if operation == "VecDot":
            scale = 1
        elif operation == "VecAXPY":
            scale = 1
        cpu_flush.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
                ".767597", operation, True, count)))
        cpu_half_flush.append(scale * float(
            ut.get_floprate(
                "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) +
                ".654910", operation, True, count)))
        cpu_half_flush_december.append(scale * float(
            ut.get_floprate(
                "../data/vec-ops-december/vec_ops.n2_g0_c21_p42." + str(size) +
                ".795805", operation, True, count)))
        cpu_no_flush.append(scale * float(
            ut.get_floprate(
                "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." +
                str(size) + ".767590", operation, True, count)))
        cpu_flush_vecset.append(scale * float(
            ut.get_floprate(
                "../data/cpu-flush-cache-vecset/vec_ops.n2_g0_c21_p42." +
                str(size) + ".792547", operation, True, count)))
        cpu_permute.append(scale * float(
            ut.get_floprate(
                "../data/permute-operations/vec_ops.n2_g0_c21_p42." +
                str(size) + ".792549", operation, True, count)))

        scale = 2 / mem_scale  # VecCopy
        time = ut.get_time(
            "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
            ".767597", "VecCopy", 1)
        cpu_VecCopy_flush.append(scale * ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910",
            "VecCopy", 1)
        cpu_VecCopy_half_flush.append(scale * ut.calc_rate(size, time))
        time = ut.get_time(
            "../data/cpu-no-flush-cache/vec_ops.n2_g0_c21_p42." + str(size) +
            ".767590", "VecCopy", 1)
        cpu_VecCopy_no_flush.append(scale * ut.calc_rate(size, time))

    print cpu_half_flush_december[0]

    # plot
    fig, left = plt.subplots()
    right = left.twinx()
    cm = plt.get_cmap('inferno')

    left.plot(cpu_sizes,
              cpu_flush,
              color=cm((1. * 2) / 4),
              label=operation + " cleared cache")
    left.plot(cpu_sizes,
              cpu_half_flush,
              color=cm((1. * 2) / 4),
              linestyle="dashed",
              label=operation + " half cleared cache")
    left.plot(cpu_sizes,
              cpu_half_flush_december,
              color="black",
              label=operation + " December")
    left.plot(cpu_sizes,
              cpu_no_flush,
              color=cm((1. * 2) / 4),
              linestyle="dotted",
              label=operation + " uncleared cache")
    # left.plot(cpu_sizes, cpu_permute, color="black", linestyle="dashed", label=operation+ " another cleared")
    # left.plot(cpu_sizes, cpu_permute, color="black", label=operation+ " rearrange operations")

    # left.plot(cpu_sizes, cpu_VecCopy_flush, color=cm((1.*1-1)/4), label="VecCopy cleared cache")
    # left.plot(cpu_sizes, cpu_VecCopy_half_flush, color=cm((1.*1-1)/4), linestyle="dashed", label="VecCopy half cleared cache")
    # left.plot(cpu_sizes, cpu_VecCopy_no_flush, color=cm((1.*1-1)/4), linestyle="dotted", label="VecCopy uncleared cache")

    plt.xlim([500, 2000000000])
    left.set_title("CPU " + operation + " cache performance", fontsize=12)
    left.set_xlabel("Vector size", fontsize=12)
    left.set_ylabel("MFlops/second", fontsize=12)
    right.set_ylabel("8 MBytes/second", fontsize=12)
    left.legend(loc="lower right", fontsize=12, ncol=1, frameon=False)
    plt.legend(loc="upper left", fontsize=12, ncol=1, frameon=False)
    left.set_xscale('log')
    left.set_yscale('log')
    right.set_yscale('log')
    top_ = 1000000
    left.set_ylim(top=top_)
    right.set_ylim(top=top_ * mem_scale)
    right.set_ylim(bottom=20)
    left.set_ylim(bottom=20)
    plt.tight_layout()

    # plt.savefig("../plots/" + operation + "_CPU_cleared_cache.png")
    if show: plt.show()
コード例 #11
0
def cpu_gpu_flops(operation, count, normed, ncols, show):

    gpu_sizes = [
        1000, 10000, 100000, 1000000, 2000000, 4000000, 6000000, 8000000,
        10000000, 20000000, 40000000, 60000000, 80000000, 100000000, 1000000000
    ]
    cpu_sizes = [1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000]

    gpu1 = []
    gpu2 = []
    gpu3 = []
    gpu6 = []

    cpu7 = []
    cpu14 = []
    cpu21 = []
    cpu28 = []
    cpu35 = []
    cpu42 = []

    if operation == "VecDot":
        path = "waitforgpu"
        run_num = ".718559"
    elif operation == "VecAXPY":
        path = "figures-2-7-8-9"
        run_num = ".668627"

    for size in gpu_sizes:
        gpu1.append(
            float(
                ut.get_floprate(
                    "../data/" + path + "/vec_ops.n1_g1_c2_a1." + str(size) +
                    run_num, operation, False, count)))
        gpu2.append(
            float(
                ut.get_floprate(
                    "../data/" + path + "/vec_ops.n2_g1_c2_a1." + str(size) +
                    run_num, operation, False, count)))
        gpu3.append(
            float(
                ut.get_floprate(
                    "../data/" + path + "/vec_ops.n3_g1_c2_a1." + str(size) +
                    run_num, operation, False, count)))
        gpu6.append(
            float(
                ut.get_floprate(
                    "../data/" + path + "/vec_ops.n6_g1_c2_a1." + str(size) +
                    run_num, operation, False, count)))

    for size in cpu_sizes:
        # cpu7.append( float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p7."  + str(size) + ".654910", operation, True, count)))
        # cpu14.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p14." + str(size) + ".654910", operation, True, count)))
        # cpu21.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p21." + str(size) + ".654910", operation, True, count)))
        # cpu28.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p28." + str(size) + ".654910", operation, True, count)))
        # cpu35.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p35." + str(size) + ".654910", operation, True, count)))
        # cpu42.append(float(ut.get_floprate("../data/vec-ops/vec_ops.n2_g0_c21_p42." + str(size) + ".654910", operation, True, count)))
        cpu7.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p7." +
                    str(size), operation, True, count)))
        cpu14.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p14." +
                    str(size), operation, True, count)))
        cpu21.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p21." +
                    str(size), operation, True, count)))
        cpu28.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p28." +
                    str(size), operation, True, count)))
        cpu35.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p35." +
                    str(size), operation, True, count)))
        cpu42.append(
            float(
                ut.get_floprate(
                    "../data/cpu-flush-cache/vec_ops.n2_g0_c21_p42." +
                    str(size), operation, True, count)))

    if normed:
        gpu2[:] = [x / 2.0 for x in gpu2]
        gpu3[:] = [x / 3.0 for x in gpu3]
        gpu6[:] = [x / 6.0 for x in gpu6]

        cpu7[:] = [x / 7.0 for x in cpu7]
        cpu14[:] = [x / 14.0 for x in cpu14]
        cpu21[:] = [x / 21.0 for x in cpu21]
        cpu28[:] = [x / 28.0 for x in cpu28]
        cpu35[:] = [x / 35.0 for x in cpu35]
        cpu42[:] = [x / 42.0 for x in cpu42]

    # plot
    num = 10
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    ax.plot(cpu_sizes,
            cpu7,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="7 CPU cores")
    ax.plot(cpu_sizes,
            cpu14,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="14 CPU cores")
    ax.plot(cpu_sizes,
            cpu21,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="21 CPU cores")
    ax.plot(cpu_sizes,
            cpu28,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="28 CPU cores")
    ax.plot(cpu_sizes,
            cpu35,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="35 CPU cores")
    ax.plot(cpu_sizes,
            cpu42,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="42 CPU cores")

    ax.plot(gpu_sizes,
            gpu1,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="1 GPU")
    ax.plot(gpu_sizes,
            gpu2,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="2 GPUs")
    ax.plot(gpu_sizes,
            gpu3,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="3 GPUs")
    ax.plot(gpu_sizes,
            gpu6,
            marker="o",
            markersize="4",
            markeredgewidth=2,
            label="6 GPUs")

    plt.title(operation + " performance", fontsize=12)
    plt.xlabel("Vector size", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="upper left", fontsize=12, ncol=ncols, frameon=False)
    plt.tight_layout()
    plt.xscale('log')
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/CPU_GPU_" + operation + "_flops" +
                ("_norm" if normed else "") + ".png")
    if show: plt.show()
コード例 #12
0
def gpu_virtualization(operation, count, height, show):

    gpu1 = []
    gpu2 = []
    gpu3 = []
    gpu6 = []

    size = 100000000
    size_str = "10^8"

    if operation == "VecDot":
        for cpus in range(1, 42):
            gpu1.append(
                float(
                    ut.get_floprate(
                        "../data/waitforgpu/vec_ops.n1_g1_c42_a" + str(cpus) +
                        "." + str(size) + ".718553", operation, False, count)))

        for cpus in range(1, 22):
            gpu2.append(
                float(
                    ut.get_floprate(
                        "../data/waitforgpu/vec_ops.n2_g1_c21_a" + str(cpus) +
                        "." + str(size) + ".718554", operation, False, count)))

        for cpus in range(1, 15):
            gpu3.append(
                float(
                    ut.get_floprate(
                        "../data/waitforgpu/vec_ops.n3_g1_c14_a" + str(cpus) +
                        "." + str(size) + ".718555", operation, False, count)))

        for cpus in range(1, 8):
            gpu6.append(
                float(
                    ut.get_floprate(
                        "../data/waitforgpu/vec_ops.n6_g1_c7_a" + str(cpus) +
                        "." + str(size) + ".718556", operation, False, count)))

    elif operation == "VecAXPY":
        for cpus in range(1, 42):
            gpu1.append(
                float(
                    ut.get_floprate(
                        "../data/vec-ops/vec_ops.n1_g1_c42_a" + str(cpus) +
                        "." + str(size) + ".654911", operation, False, count)))

        for cpus in range(1, 22):
            gpu2.append(
                float(
                    ut.get_floprate(
                        "../data/vec-ops/vec_ops.n2_g1_c21_a" + str(cpus) +
                        "." + str(size) + ".654912", operation, False, count)))

        for cpus in range(1, 15):
            gpu3.append(
                float(
                    ut.get_floprate(
                        "../data/vec-ops/vec_ops.n3_g1_c14_a" + str(cpus) +
                        "." + str(size) + ".654913", operation, False, count)))

        for cpus in range(1, 8):
            gpu6.append(
                float(
                    ut.get_floprate(
                        "../data/vec-ops/vec_ops.n6_g1_c7_a" + str(cpus) +
                        "." + str(size) + ".654914", operation, False, count)))

    # plot
    num = 4
    cm = plt.get_cmap('inferno')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_color_cycle([cm((1. * i) / num) for i in range(num)])

    ax.plot(range(1, 42),
            gpu1,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="1 GPU")
    ax.plot(range(2, 43, 2),
            gpu2,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="2 GPUs")
    ax.plot(range(3, 43, 3),
            gpu3,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="3 GPUs")
    ax.plot(range(6, 43, 6),
            gpu6,
            marker="o",
            markersize="6",
            markeredgewidth=2,
            linestyle="none",
            label="6 GPUs")

    plt.title(operation + " virtualization performance", fontsize=12)
    plt.xlabel("MPI ranks", fontsize=12)
    plt.ylabel("MFlops/second", fontsize=12)
    plt.legend(loc="upper right", ncol=2, fontsize=12, frameon=False)
    plt.xlim([0, 43])
    plt.ylim(top=height)
    plt.tight_layout()
    ax.set_yticklabels(
        ['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

    plt.savefig("../plots/" + operation + "_virtualization_" + size_str +
                ".png")
    if show: plt.show()