Beispiel #1
0
def build_ispc(code):
    with open("tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    tmpdir = tempfile.mkdtemp()

    build_ispc_shared_lib(
            tmpdir,
            [("my.ispc", code)],
            [("tasksys.cpp", tasksys_source)],
            cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
            ispc_options=([
                "-g", "--no-omit-frame-pointer",
                "--target=" + ISPC_TARGET,
                "--opt=force-aligned-memory",
                "--opt=disable-loop-unroll",
                #"--math-lib=fast",
                #"--opt=fast-math",
                #"--opt=disable-fma",
                ]
                #+ (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])
                ),
            #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc",
            quiet=False,
            )

    shared_obj = os.path.join(tmpdir, "shared.so")
    return shared_obj
def main():
    with open("tasksys.cpp") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel(
                "{[i]: 0<=i<n}",
                insn,
                target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                name="stream_"+name+"_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    init_knl = make_knl("init", """
                a[i] = 1
                b[i] = 2
                c[i] = 0
                """, "a,b,c")
    triad_knl = make_knl("triad", """
            a[i] = b[i] + scalar * c[i]
            """, "a,b,c,scalar")

    with TemporaryDirectory() as tmpdir:
        ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
                tmpdir,
                [("stream.ispc", ispc_code)],
                [("tasksys.cpp", tasksys_source)],
                cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
                ispc_options=([
                    #"-g", "--no-omit-frame-pointer",
                    "--target=avx2-i32x8",
                    "--opt=force-aligned-memory",
                    "--opt=disable-loop-unroll",
                    #"--opt=fast-math",
                    #"--opt=disable-fma",
                    ]
                    + (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])
                    ),
                #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc",
                quiet=False,
                )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5

        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        print(
                hex(address_from_numpy(a)),
                hex(address_from_numpy(b)),
                hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        knl_lib.stream_init_tasks(
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                INDEX_CTYPE(ARRAY_SIZE),
                )

        def call_kernel():
            knl_lib.stream_triad_tasks(
                    cptr_from_numpy(a),
                    cptr_from_numpy(b),
                    cptr_from_numpy(c),
                    STREAM_CTYPE(scalar),
                    INDEX_CTYPE(ARRAY_SIZE),
                    )

        call_kernel()
        call_kernel()

        start_time = time()

        for irun in range(NRUNS):
            call_kernel()

        elapsed = time() - start_time

        print(elapsed/NRUNS)

        print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s")

        assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
Beispiel #3
0
def run(approx, code, size, NRUNS):
    ALIGN_TO = 4096
    if approx.dtype_name == "float":
        dt = np.float32
        STREAM_DTYPE = np.float32
        STREAM_CTYPE = ctypes.c_float
        INDEX_DTYPE = np.int32
        INDEX_CTYPE = ctypes.c_int
    elif approx.dtype_name == "double":
        dt = np.float64
        STREAM_DTYPE = np.float64
        STREAM_CTYPE = ctypes.c_doublev
        INDEX_DTYPE = np.int64
        INDEX_CTYPE = ctypes.c_longlong

    with open("tests/tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    with TemporaryDirectory() as tmpdir:
        print(code)
        build_ispc_shared_lib(
            tmpdir,
            [("stream.ispc", code)],
            [("tasksys.cpp", tasksys_source)],
            cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
            ispc_options=([
                "-g",
                "--no-omit-frame-pointer",
                #"--target=avx2-i32x8",
                "--arch=x86-64",
                "--target=avx2",
                #"--opt=force-aligned-memory",
                "--opt=disable-loop-unroll",
                #"--opt=fast-math",
                "--opt=disable-fma",
                "--woff"
            ] + (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])),
            ispc_bin="/home/ubuntu-boot/Desktop/ispc-v1.9.1-linux/ispc",
            quiet=True,
        )

        x = np.linspace(approx.lower_bound, approx.upper_bound, size, dtype=dt)
        y = np.zeros(size, dtype=dt)
        approx.tree_1d = np.array(approx.tree_1d, dtype=dt)
        approx.map = np.array(approx.map, dtype=dt)
        approx.mid = np.array(approx.mid, dtype=dt)
        approx.left = np.array(approx.left, dtype=dt)
        approx.right = np.array(approx.right, dtype=dt)
        approx.interval_a = np.array(approx.interval_a, dtype=dt)
        approx.interval_b = np.array(approx.interval_b, dtype=dt)
        approx.coeff = np.array(approx.coeff, dtype=dt)

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        def call_kernel():
            if 'map' in approx.optimizations:
                knl_lib.eval(
                    cptr_from_numpy(approx.mid),
                    cptr_from_numpy(approx.left),
                    cptr_from_numpy(approx.right),
                    cptr_from_numpy(approx.interval_a),
                    cptr_from_numpy(approx.interval_b),
                    cptr_from_numpy(approx.coeff),
                    cptr_from_numpy(approx.map),
                    cptr_from_numpy(x),
                    cptr_from_numpy(y),
                    INDEX_CTYPE(size),
                )
            else:
                knl_lib.eval(
                    cptr_from_numpy(approx.tree_1d),
                    cptr_from_numpy(x),
                    cptr_from_numpy(y),
                    INDEX_CTYPE(size),
                )

        # clear the kernel
        for i in range(10):
            call_kernel()
        #print(y)
        #print(np.max(y))

        plt.figure()
        plt.plot(x[::2048], y[::2048])
        plt.show()

        start_time = time.time()
        for irun in range(NRUNS):
            call_kernel()
        elapsed = time.time() - start_time

        FLOPS = (4 + 2 + 2 * (approx.max_order - 2))
        print("Average Runtime:", elapsed / NRUNS)
        # times size*4 because thats the number of bytes in x
        GFLOPS = FLOPS * size / (2**30)
        print(GFLOPS / elapsed, "GFLOPS/s")
        return y
Beispiel #4
0
def main():
    with open("tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel(
                "{[i]: 0<=i<n}",
                insn,
                target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                name="stream_"+name+"_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    init_knl = make_knl("init", """
                a[i] = 1
                b[i] = 2
                c[i] = 0
                """, "a,b,c")
    triad_knl = make_knl("triad", """
            a[i] = b[i] + scalar * c[i]
            """, "a,b,c,scalar")

    with TemporaryDirectory() as tmpdir:
        ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
                tmpdir,
                [("stream.ispc", ispc_code)],
                [("tasksys.cpp", tasksys_source)],
                cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
                ispc_options=([
                    #"-g", "--no-omit-frame-pointer",
                    "--target=avx2-i32x8",
                    "--opt=force-aligned-memory",
                    "--opt=disable-loop-unroll",
                    #"--opt=fast-math",
                    #"--opt=disable-fma",
                    ]
                    + (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])
                    ),
                #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc",
                quiet=False,
                )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5

        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        print(
                hex(address_from_numpy(a)),
                hex(address_from_numpy(b)),
                hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        knl_lib.stream_init_tasks(
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                INDEX_CTYPE(ARRAY_SIZE),
                )

        def call_kernel():
            knl_lib.stream_triad_tasks(
                    cptr_from_numpy(a),
                    cptr_from_numpy(b),
                    cptr_from_numpy(c),
                    STREAM_CTYPE(scalar),
                    INDEX_CTYPE(ARRAY_SIZE),
                    )

        call_kernel()
        call_kernel()

        start_time = time()

        for irun in range(NRUNS):
            call_kernel()

        elapsed = time() - start_time

        print(elapsed/NRUNS)

        print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s")

        assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
def main(experiment):
    print()
    print("Task: ", experiment)
    with open("tests/tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel("{[i]: 0<=i<n}",
                             insn,
                             target=lp.ISPCTarget(),
                             index_dtype=INDEX_DTYPE,
                             name="stream_" + name + "_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    if experiment == "triad":
        init_knl = make_knl(
            "init", """
                    a[i] = 1
                    b[i] = 3
                    c[i] = 0
                    scalar = 7
                    """, "a,b,c,scalar")
        triad_knl = make_knl(
            "triad", """
                a[i] = b[i] + scalar * c[i]
                """, "a,b,c,scalar")
    else:
        init_knl = make_knl(
            "init", """
                    a[i] = 1
                    b[i] = 9
                    """, "a,b")
        copy_knl = make_knl("copy", """
            a[i] = b[i]
            """, "a,b")

    with TemporaryDirectory() as tmpdir:
        if experiment == "copy":
            ispc_code = gen_code(init_knl) + gen_code(copy_knl)
        else:
            ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
            tmpdir,
            [("stream.ispc", ispc_code)],
            [("tasksys.cpp", tasksys_source)],
            cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
            ispc_options=([
                "-g",
                "-O0",
                "--no-omit-frame-pointer",
                "--target=avx2-i32x8",
                #"--opt=force-aligned-memory",
                "--opt=disable-loop-unroll",
                #"--opt=fast-math",
                "--opt=disable-fma",
                "--addressing=32",
            ]),
            ispc_bin="/home/ubuntu-boot/Desktop/ispc-v1.9.1-linux/ispc",
            quiet=True,
        )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5
        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        #print(  hex(address_from_numpy(a)),
        #        hex(address_from_numpy(b)),
        #        hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        if experiment == "copy":
            g = knl_lib.stream_copy_tasks
            x = [
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                INDEX_CTYPE(ARRAY_SIZE),
            ]
        else:
            g = knl_lib.stream_triad_tasks
            x = [
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                STREAM_CTYPE(scalar),
                INDEX_CTYPE(ARRAY_SIZE),
            ]

        for i in range(4):
            g(*x)

        def call_kernel():
            g(*x)

        for i in range(3):
            call_kernel()

        start_time = time()
        for irun in range(NRUNS):
            call_kernel()
        elapsed = time() - start_time

        print("Avg Time: ", elapsed / NRUNS)
        by = 3 if experiment == "triad" else 2
        print("MB: ", 1e-9 * by * a.nbytes * NRUNS / elapsed, "GB/s")