def call_kernel():
     knl_lib.stream_triad_tasks(
             cptr_from_numpy(a),
             cptr_from_numpy(b),
             cptr_from_numpy(c),
             STREAM_CTYPE(scalar),
             INDEX_CTYPE(ARRAY_SIZE),
             )
Example #2
0
 def call_kernel():
     knl_lib.stream_triad_tasks(
             cptr_from_numpy(a),
             cptr_from_numpy(b),
             cptr_from_numpy(c),
             STREAM_CTYPE(scalar),
             INDEX_CTYPE(ARRAY_SIZE),
             )
Example #3
0
def main():
    shared_obj = build_ispc(ISPC_CODE)
    lib = ctypes.cdll.LoadLibrary(shared_obj)

    n = 2**20
    alignment = 4096  # a page
    a = empty_aligned(n, dtype=np.float32, n=alignment)
    b = empty_aligned(n, dtype=np.float32, n=alignment)

    b.fill(np.pi)

    lib.scale(cptr_from_numpy(a), cptr_from_numpy(b), ctypes.c_float(15), ctypes.c_int(n))
def main():
    with open("tasksys.cpp") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel(
                "{[i]: 0<=i<n}",
                insn,
                target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                name="stream_"+name+"_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    init_knl = make_knl("init", """
                a[i] = 1
                b[i] = 2
                c[i] = 0
                """, "a,b,c")
    triad_knl = make_knl("triad", """
            a[i] = b[i] + scalar * c[i]
            """, "a,b,c,scalar")

    with TemporaryDirectory() as tmpdir:
        ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
                tmpdir,
                [("stream.ispc", ispc_code)],
                [("tasksys.cpp", tasksys_source)],
                cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
                ispc_options=([
                    #"-g", "--no-omit-frame-pointer",
                    "--target=avx2-i32x8",
                    "--opt=force-aligned-memory",
                    "--opt=disable-loop-unroll",
                    #"--opt=fast-math",
                    #"--opt=disable-fma",
                    ]
                    + (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])
                    ),
                #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc",
                quiet=False,
                )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5

        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        print(
                hex(address_from_numpy(a)),
                hex(address_from_numpy(b)),
                hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        knl_lib.stream_init_tasks(
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                INDEX_CTYPE(ARRAY_SIZE),
                )

        def call_kernel():
            knl_lib.stream_triad_tasks(
                    cptr_from_numpy(a),
                    cptr_from_numpy(b),
                    cptr_from_numpy(c),
                    STREAM_CTYPE(scalar),
                    INDEX_CTYPE(ARRAY_SIZE),
                    )

        call_kernel()
        call_kernel()

        start_time = time()

        for irun in range(NRUNS):
            call_kernel()

        elapsed = time() - start_time

        print(elapsed/NRUNS)

        print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s")

        assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
Example #5
0
def main():
    with open("tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel(
                "{[i]: 0<=i<n}",
                insn,
                target=lp.ISPCTarget(), index_dtype=INDEX_DTYPE,
                name="stream_"+name+"_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    init_knl = make_knl("init", """
                a[i] = 1
                b[i] = 2
                c[i] = 0
                """, "a,b,c")
    triad_knl = make_knl("triad", """
            a[i] = b[i] + scalar * c[i]
            """, "a,b,c,scalar")

    with TemporaryDirectory() as tmpdir:
        ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
                tmpdir,
                [("stream.ispc", ispc_code)],
                [("tasksys.cpp", tasksys_source)],
                cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
                ispc_options=([
                    #"-g", "--no-omit-frame-pointer",
                    "--target=avx2-i32x8",
                    "--opt=force-aligned-memory",
                    "--opt=disable-loop-unroll",
                    #"--opt=fast-math",
                    #"--opt=disable-fma",
                    ]
                    + (["--addressing=64"] if INDEX_DTYPE == np.int64 else [])
                    ),
                #ispc_bin="/home/andreask/pack/ispc-v1.9.0-linux/ispc",
                quiet=False,
                )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5

        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        print(
                hex(address_from_numpy(a)),
                hex(address_from_numpy(b)),
                hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        knl_lib.stream_init_tasks(
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                INDEX_CTYPE(ARRAY_SIZE),
                )

        def call_kernel():
            knl_lib.stream_triad_tasks(
                    cptr_from_numpy(a),
                    cptr_from_numpy(b),
                    cptr_from_numpy(c),
                    STREAM_CTYPE(scalar),
                    INDEX_CTYPE(ARRAY_SIZE),
                    )

        call_kernel()
        call_kernel()

        start_time = time()

        for irun in range(NRUNS):
            call_kernel()

        elapsed = time() - start_time

        print(elapsed/NRUNS)

        print(1e-9*3*a.nbytes*NRUNS/elapsed, "GB/s")

        assert la.norm(a-b+scalar*c, np.inf) < np.finfo(STREAM_DTYPE).eps * 10
Example #6
0
 def call_kernel():
     if 'map' in approx.optimizations:
         knl_lib.eval(
             cptr_from_numpy(approx.mid),
             cptr_from_numpy(approx.left),
             cptr_from_numpy(approx.right),
             cptr_from_numpy(approx.interval_a),
             cptr_from_numpy(approx.interval_b),
             cptr_from_numpy(approx.coeff),
             cptr_from_numpy(approx.map),
             cptr_from_numpy(x),
             cptr_from_numpy(y),
             INDEX_CTYPE(size),
         )
     else:
         knl_lib.eval(
             cptr_from_numpy(approx.tree_1d),
             cptr_from_numpy(x),
             cptr_from_numpy(y),
             INDEX_CTYPE(size),
         )
def main(experiment):
    print()
    print("Task: ", experiment)
    with open("tests/tasksys.cpp", "r") as ts_file:
        tasksys_source = ts_file.read()

    def make_knl(name, insn, vars):
        knl = lp.make_kernel("{[i]: 0<=i<n}",
                             insn,
                             target=lp.ISPCTarget(),
                             index_dtype=INDEX_DTYPE,
                             name="stream_" + name + "_tasks")

        knl = transform(knl, vars, STREAM_DTYPE)
        return knl

    if experiment == "triad":
        init_knl = make_knl(
            "init", """
                    a[i] = 1
                    b[i] = 3
                    c[i] = 0
                    scalar = 7
                    """, "a,b,c,scalar")
        triad_knl = make_knl(
            "triad", """
                a[i] = b[i] + scalar * c[i]
                """, "a,b,c,scalar")
    else:
        init_knl = make_knl(
            "init", """
                    a[i] = 1
                    b[i] = 9
                    """, "a,b")
        copy_knl = make_knl("copy", """
            a[i] = b[i]
            """, "a,b")

    with TemporaryDirectory() as tmpdir:
        if experiment == "copy":
            ispc_code = gen_code(init_knl) + gen_code(copy_knl)
        else:
            ispc_code = gen_code(init_knl) + gen_code(triad_knl)
        print(ispc_code)

        build_ispc_shared_lib(
            tmpdir,
            [("stream.ispc", ispc_code)],
            [("tasksys.cpp", tasksys_source)],
            cxx_options=["-g", "-fopenmp", "-DISPC_USE_OMP"],
            ispc_options=([
                "-g",
                "-O0",
                "--no-omit-frame-pointer",
                "--target=avx2-i32x8",
                #"--opt=force-aligned-memory",
                "--opt=disable-loop-unroll",
                #"--opt=fast-math",
                "--opt=disable-fma",
                "--addressing=32",
            ]),
            ispc_bin="/home/ubuntu-boot/Desktop/ispc-v1.9.1-linux/ispc",
            quiet=True,
        )

        knl_lib = ctypes.cdll.LoadLibrary(os.path.join(tmpdir, "shared.so"))

        scalar = 5
        a = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        b = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)
        c = empty_aligned(ARRAY_SIZE, dtype=STREAM_DTYPE, n=ALIGN_TO)

        #print(  hex(address_from_numpy(a)),
        #        hex(address_from_numpy(b)),
        #        hex(address_from_numpy(c)))
        assert address_from_numpy(a) % ALIGN_TO == 0
        assert address_from_numpy(b) % ALIGN_TO == 0
        assert address_from_numpy(c) % ALIGN_TO == 0

        if experiment == "copy":
            g = knl_lib.stream_copy_tasks
            x = [
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                INDEX_CTYPE(ARRAY_SIZE),
            ]
        else:
            g = knl_lib.stream_triad_tasks
            x = [
                cptr_from_numpy(a),
                cptr_from_numpy(b),
                cptr_from_numpy(c),
                STREAM_CTYPE(scalar),
                INDEX_CTYPE(ARRAY_SIZE),
            ]

        for i in range(4):
            g(*x)

        def call_kernel():
            g(*x)

        for i in range(3):
            call_kernel()

        start_time = time()
        for irun in range(NRUNS):
            call_kernel()
        elapsed = time() - start_time

        print("Avg Time: ", elapsed / NRUNS)
        by = 3 if experiment == "triad" else 2
        print("MB: ", 1e-9 * by * a.nbytes * NRUNS / elapsed, "GB/s")