Ejemplo n.º 1
0
def test_rpc_remote_module():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)

    def check_remote():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote()
Ejemplo n.º 2
0
def test_rpc_simple():
    if not tvm.module.enabled("rpc"):
        return

    @tvm.register_func("rpc.test.addone")
    def addone(x):
        return x + 1

    @tvm.register_func("rpc.test.strcat")
    def strcat(name, x):
        return "%s:%d" % (name, x)

    @tvm.register_func("rpc.test.except")
    def remotethrow(name):
        raise ValueError("%s" % name)

    server = rpc.Server("localhost")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.addone")
    assert f1(10) == 11
    f3 = client.get_function("rpc.test.except")
    try:
        f3("abc")
        assert False
    except tvm.TVMError as e:
        assert "abc" in str(e)

    f2 = client.get_function("rpc.test.strcat")
    assert f2("abc", 11) == "abc:11"
Ejemplo n.º 3
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    target = "llvm -target=asmjs-unknown-emscripten -system-lib"

    def check_remote():
        if not tvm.module.enabled(target):
            print("Skip because %s is not enabled" % target)
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], target, name="myadd")
        path_obj = temp.relpath("dev_lib.bc")
        path_dso = temp.relpath("dev_lib.js")
        f.save(path_obj)
        emscripten.create_js(path_dso, path_obj, side_module=True)
        # Upload to suffix as dso so it can be loaded remotely
        remote.upload(path_dso, "dev_lib.dso")
        data = remote.download("dev_lib.dso")
        f1 = remote.load_module("dev_lib.dso")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote()
Ejemplo n.º 4
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    target = "llvm -target=asmjs-unknown-emscripten -system-lib"
    def check_remote():
        if not tvm.module.enabled(target):
            print("Skip because %s is not enabled" % target)
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], target, name="myadd")
        path_obj = temp.relpath("dev_lib.bc")
        path_dso = temp.relpath("dev_lib.js")
        f.save(path_obj)
        emscripten.create_js(path_dso, path_obj, side_module=True)
        # Upload to suffix as dso so it can be loaded remotely
        remote.upload(path_dso, "dev_lib.dso")
        data = remote.download("dev_lib.dso")
        f1 = remote.load_module("dev_lib.dso")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
    check_remote()
    def build_arm():
        target = "llvm -target=armv7-none-linux-gnueabihf"
        if not tvm.module.enabled(target):
            print("Skip because %s is not enabled.." % target)
            return
        temp = util.tempdir()
        f = tvm.build(s, [A, B, C], target)
        path = temp.relpath("myadd.o")
        f.save(path)
        verify_elf(path, 0x28)
        asm_path = temp.relpath("myadd.asm")
        f.save(asm_path)
        # Do a RPC verification, launch kernel on Arm Board if available.
        host = os.environ.get('TVM_RPC_ARM_HOST', None)
        remote = None
        if host:
            port = int(os.environ['TVM_RPC_ARM_PORT'])
            try:
                remote = rpc.connect(host, port)
            except tvm.TVMError as e:
                pass

        if remote:
            remote.upload(path)
            farm = remote.load_module("myadd.o")
            ctx = remote.cpu(0)
            n = nn
            a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
            b = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
            c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
            farm(a, b, c)
            np.testing.assert_allclose(
                c.asnumpy(), a.asnumpy() + b.asnumpy())
            print("Verification finish on remote..")
Ejemplo n.º 6
0
def test_bigendian_rpc():
    """Test big endian rpc when there is a PowerPC RPC server available"""
    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
    if host is None:
        return

    def verify_rpc(remote, target, shape, dtype):
        A = tvm.placeholder(shape, dtype=dtype)
        B = tvm.compute(A.shape, lambda i: A[i] + tvm.const(1, A.dtype))
        s = tvm.create_schedule(B.op)
        f = tvm.build(s, [A, B], target, name="myadd")

        ctx = remote.cpu(0)
        a = tvm.nd.array(np.random.randint(0, 256, size=shape).astype(A.dtype),
                         ctx=ctx)
        b = tvm.nd.array(np.zeros(shape).astype(A.dtype), ctx=ctx)
        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        f.save(path_dso)
        remote.upload(path_dso)
        f = remote.load_module("dev_lib.o")
        f(a, b)
        np.testing.assert_allclose(a.asnumpy() + 1, b.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)
    target = "llvm -mtriple=powerpc-linux-gnu"
    for dtype in ["float32", "float64", "int32", "int8"]:
        verify_rpc(remote, target, (10, ), dtype)
Ejemplo n.º 7
0
def test_rpc_simple():
    if not tvm.module.enabled("rpc"):
        return
    @tvm.register_func("rpc.test.addone")
    def addone(x):
        return x + 1
    @tvm.register_func("rpc.test.strcat")
    def strcat(name, x):
        return "%s:%d" % (name, x)

    @tvm.register_func("rpc.test.except")
    def remotethrow(name):
        raise ValueError("%s" % name)

    server = rpc.Server("localhost")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.addone")
    assert f1(10) == 11
    f3 = client.get_function("rpc.test.except")
    try:
        f3("abc")
        assert False
    except tvm.TVMError as e:
        assert "abc" in str(e)

    f2 = client.get_function("rpc.test.strcat")
    assert f2("abc", 11) == "abc:11"
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, required=True, choices=['resnet', 'mobilenet'],
        help="The model type.")
    parser.add_argument('--host', type=str, required=True, help="The host address of your Raspberry Pi.")
    parser.add_argument('--port', type=int, required=True, help="The port number of your Raspberry Pi.")
    parser.add_argument('--opt-level', type=int, default=1, help="Level of optimization.")
    parser.add_argument('--num-iter', type=int, default=50, help="Number of iteration during benchmark.")
    args = parser.parse_args()

    opt_level = args.opt_level

    num_iter = args.num_iter
    batch_size = 1
    num_classes = 1000
    image_shape = (3, 224, 224)

    data_shape = (batch_size,) + image_shape
    out_shape = (batch_size, num_classes)
    if args.model == 'resnet':
        net, params = nnvm.testing.resnet.get_workload(
            batch_size=1, image_shape=image_shape)
    elif args.model == 'mobilenet':
        net, params = nnvm.testing.mobilenet.get_workload(
            batch_size=1, image_shape=image_shape)
    else:
        raise ValueError('no benchmark prepared for {}.'.format(args.model))


    with nnvm.compiler.build_config(opt_level=opt_level):
        graph, lib, params = nnvm.compiler.build(
            net, tvm.target.rasp(), shape={"data": data_shape}, params=params)

    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.o')
    lib.save(lib_fname)

    remote = rpc.connect(args.host, args.port)
    remote.upload(lib_fname)

    ctx = remote.cpu(0)
    rlib = remote.load_module('net.o')
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

    module = runtime.create(graph, rlib, ctx)
    module.set_input('data', tvm.nd.array(np.random.uniform(size=(data_shape)).astype("float32")))
    module.set_input(**rparams)
    module.run()
    out = module.get_output(0, tvm.nd.empty(out_shape, ctx=ctx))
    out.asnumpy()

    print('benchmark args: {}'.format(args))
    ftimer = module.module.time_evaluator("run", ctx, num_iter)
    for i in range(3):
        prof_res = ftimer()
        print(prof_res)
        # sleep for avoiding cpu overheat
        time.sleep(45)
Ejemplo n.º 9
0
def test_rpc_file_exchange():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    blob = bytearray(np.random.randint(0, 10, size=(10)))
    remote.upload(blob, "dat.bin")
    rev = remote.download("dat.bin")
    assert(rev == blob)
Ejemplo n.º 10
0
def test_rpc_return_func():
    @tvm.register_func("rpc.test.remote_func")
    def addone(x):
        return lambda y: x+y
    server = rpc.Server("localhost")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.remote_func")
    fadd = f1(10)
    assert fadd(12) == 22
Ejemplo n.º 11
0
def try_remote_save_load():
    if not tvm.module.enabled("rpc"):
        return
    if not tvm.module.enabled("opengl"):
        return
    if not tvm.module.enabled("llvm"):
        return

    # Build the module.
    n = tvm.var("n")
    A = tvm.placeholder((n, ), name='A')
    B = tvm.placeholder((n, ), name='B')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule(C.op)
    s[C].opengl()
    target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
    f = tvm.build(s, [A, B, C],
                  "opengl",
                  target_host=target_host,
                  name="myadd")

    remote = rpc.connect(proxy_host, proxy_port, key="js")

    temp = util.tempdir()
    ctx = remote.opengl(0)
    path_obj = temp.relpath("myadd.bc")
    path_dso = temp.relpath("myadd.js")
    path_gl = temp.relpath("myadd.gl")
    path_json = temp.relpath("myadd.tvm_meta.json")

    f.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    f.imported_modules[0].save(path_gl)

    remote.upload(path_dso, "myadd.dso")
    remote.upload(path_gl)
    remote.upload(path_json)

    remote.download("myadd.dso")
    remote.download("myadd.gl")
    remote.download("myadd.tvm_meta.json")

    print('Loading myadd.dso')
    fhost = remote.load_module("myadd.dso")

    print('Loading myadd.gl')
    fdev = remote.load_module("myadd.gl")

    print('import_module')
    fhost.import_module(fdev)

    print('running...')
    a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
    c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
    fhost(a, b, c)
    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
Ejemplo n.º 12
0
def test_rpc_file_exchange():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    blob = bytearray(np.random.randint(0, 10, size=(10)))
    remote.upload(blob, "dat.bin")
    rev = remote.download("dat.bin")
    assert (rev == blob)
Ejemplo n.º 13
0
def test_rpc_module():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    temp = util.tempdir()
    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    # Build the dynamic lib.
    # If we don't want to do metal and only use cpu, just set target to be target
    f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd")
    path_dso1 = temp.relpath("dev_lib.dylib")
    f.export_library(path_dso1, xcode.create_dylib, arch=arch, sdk=sdk)
    xcode.codesign(path_dso1)

    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].parallel(xi)
    s[B].pragma(xo, "parallel_launch_point")
    s[B].pragma(xi, "parallel_barrier_when_finish")
    f = tvm.build(s, [A, B], target, name="myadd_cpu")
    path_dso2 = temp.relpath("cpu_lib.dylib")
    f.export_library(path_dso2, xcode.create_dylib, arch=arch, sdk=sdk)
    xcode.codesign(path_dso2)

    # Start RPC test server that contains the compiled library.
    server = xcode.popen_test_rpc(proxy_host,
                                  proxy_port,
                                  key,
                                  destination=destination,
                                  options=['-quiet'],
                                  libs=[path_dso1, path_dso2])

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.metal(0)
    f1 = remote.load_module("dev_lib.dylib")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f1.time_evaluator(f1.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
    # CPU
    ctx = remote.cpu(0)
    f2 = remote.load_module("cpu_lib.dylib")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f2.time_evaluator(f1.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
Ejemplo n.º 14
0
def try_remote_save_load():
    if not tvm.module.enabled("rpc"):
        return
    if not tvm.module.enabled("opengl"):
        return
    if not tvm.module.enabled("llvm"):
        return

    # Build the module.
    n = tvm.var("n")
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule(C.op)
    s[C].opengl()
    target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
    f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd")

    remote = rpc.connect(proxy_host, proxy_port, key="js")

    temp = util.tempdir()
    ctx = remote.opengl(0)
    path_obj = temp.relpath("myadd.bc")
    path_dso = temp.relpath("myadd.js")
    path_gl = temp.relpath("myadd.gl")
    path_json = temp.relpath("myadd.tvm_meta.json")

    f.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    f.imported_modules[0].save(path_gl)

    remote.upload(path_dso, "myadd.dso")
    remote.upload(path_gl)
    remote.upload(path_json)

    remote.download("myadd.dso")
    remote.download("myadd.gl")
    remote.download("myadd.tvm_meta.json")

    print('Loading myadd.dso')
    fhost = remote.load_module("myadd.dso")

    print('Loading myadd.gl')
    fdev = remote.load_module("myadd.gl")

    print('import_module')
    fhost.import_module(fdev)

    print('running...')
    a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
    c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
    fhost(a, b, c)
    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
Ejemplo n.º 15
0
def test_rpc_module():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    temp = util.tempdir()
    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    # Build the dynamic lib.
    # If we don't want to do metal and only use cpu, just set target to be target
    f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd")
    path_dso1 = temp.relpath("dev_lib.so")
    f.export_library(path_dso1, ndk.create_shared)

    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].parallel(xi)
    s[B].pragma(xo, "parallel_launch_point")
    s[B].pragma(xi, "parallel_barrier_when_finish")
    f = tvm.build(s, [A, B], target, name="myadd_cpu")
    path_dso2 = temp.relpath("cpu_lib.so")
    f.export_library(path_dso2, ndk.create_shared)

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)

    print('Run GPU test ...')
    ctx = remote.cl(0)
    remote.upload(path_dso1)
    f1 = remote.load_module("dev_lib.so")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f1.time_evaluator(f1.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    print('Run CPU test ...')
    ctx = remote.cpu(0)
    remote.upload(path_dso2)
    f2 = remote.load_module("cpu_lib.so")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f2.time_evaluator(f2.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
Ejemplo n.º 16
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    x = np.random.randint(0, 10, size=(3, 4))
    @tvm.register_func("rpc.test.remote_array_func")
    def remote_array_func(y):
        np.testing.assert_equal(y.asnumpy(), x)
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    r_cpu = tvm.nd.array(x, remote.cpu(0))
    assert str(r_cpu.context).startswith("remote")
    np.testing.assert_equal(r_cpu.asnumpy(), x)
    fremote = remote.get_function("rpc.test.remote_array_func")
    fremote(r_cpu)
Ejemplo n.º 17
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    x = np.random.randint(0, 10, size=(3, 4))
    @tvm.register_func("rpc.test.remote_array_func")
    def remote_array_func(y):
        np.testing.assert_equal(y.asnumpy(), x)
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    print("second connect")
    r_cpu = tvm.nd.array(x, remote.cpu(0))
    assert str(r_cpu.context).startswith("remote")
    np.testing.assert_equal(r_cpu.asnumpy(), x)
    fremote = remote.get_function("rpc.test.remote_array_func")
    fremote(r_cpu)
Ejemplo n.º 18
0
def mobileNet_rpc_module(dtype):
    # load model
    net, params = nnvm.testing.mobilenet.get_workload(batch_size=1,
                                                      image_shape=image_shape,
                                                      dtype=dtype)

    # compile
    opt_level = 2 if dtype == 'float32' else 1
    with nnvm.compiler.build_config(opt_level=opt_level):
        graph, lib, params = nnvm.compiler.build(net,
                                                 tvm.target.mali(),
                                                 shape={"data": data_shape},
                                                 params=params,
                                                 dtype=dtype,
                                                 target_host=target)

    # upload model to remote device
    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.so')
    lib.export_library(lib_fname, ndk.create_shared)

    remote = rpc.connect(proxy_host, proxy_port, key=key)
    remote.upload(lib_fname)
    ctx = remote.cl(0)
    rlib = remote.load_module('net.so')
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

    print('Run GPU test ...')
    # create graph runtime
    module = runtime.create(graph, rlib, ctx)
    module.set_input(
        'data',
        tvm.nd.array(np.random.uniform(size=(data_shape)).astype(dtype)))
    module.set_input(**rparams)

    # the num of runs for warm up and test
    num_warmup = 50
    num_test = 300
    warm_up_timer = module.module.time_evaluator("run", ctx, num_warmup)
    warm_up_timer()
    ftimer = module.module.time_evaluator("run", ctx, num_test)
    prof_res = ftimer()
    print("backend: TVM-mali\tmodel: %s\tdtype: %s\tcost:%.4f" %
          ("mobileNet", dtype, prof_res.mean))
Ejemplo n.º 19
0
 def check_remote():
     if not tvm.module.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     server = rpc.Server("localhost")
     remote = rpc.connect(server.host, server.port)
     temp = util.tempdir()
     ctx = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     mod = graph_runtime.create(graph, mlib, remote.cpu(0))
     a = np.random.uniform(size=(n,)).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, ctx))
     out = tvm.nd.empty((n,), ctx=ctx)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.asnumpy(), a + 1)
Ejemplo n.º 20
0
 def check_remote():
     if not tvm.module.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     server = rpc.Server("localhost")
     remote = rpc.connect(server.host, server.port)
     temp = util.tempdir()
     ctx = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     mod = graph_runtime.create(graph, mlib, remote.cpu(0))
     a = np.random.uniform(size=(n, )).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, ctx))
     out = tvm.nd.empty((n, ), ctx=ctx)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.asnumpy(), a + 1)
Ejemplo n.º 21
0
 def check():
     if not tvm.module.enabled("rpc"):
         return
     @tvm.register_func("rpc.test2.addone")
     def addone(x):
         return x + 1
     @tvm.register_func("rpc.test2.strcat")
     def addone(name, x):
         return "%s:%d" % (name, x)
     server = multiprocessing.Process(
         target=rpc_proxy.websocket_proxy_server,
         args=("ws://localhost:%d/ws" % web_port,"x1"))
     # Need to make sure that the connection start after proxy comes up
     time.sleep(0.1)
     server.deamon = True
     server.start()
     client = rpc.connect(prox.host, prox.port, key="x1")
     f1 = client.get_function("rpc.test2.addone")
     assert f1(10) == 11
     f2 = client.get_function("rpc.test2.strcat")
     assert f2("abc", 11) == "abc:11"
Ejemplo n.º 22
0
def test_rpc_executor():
    host = "localhost"
    port = 9100
    server = rpc.Server(host, port, use_popen=True)

    x = sym.Variable("x")
    y = sym.Variable("y")
    z = sym.exp(y + x)
    shape = (10, 128)
    dtype = tvm.float32
    shape_dict = {"x": shape, "y": shape}
    tmp = util.tempdir()
    lib_name  = tmp.relpath("net.o")

    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
    # save module
    lib.save(lib_name)
    remote = rpc.connect(host, port)
    remote.upload(lib_name)
    ctx = remote.cpu(0)
    # load remote
    rlib = remote.load_module("net.o")

    # Create remotemodule
    m = graph_runtime.create(graph, rlib, remote.cpu(0))
    # get member functions
    set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
    na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    # set inputs
    set_input("x", na)
    set_input("y", nb)
    # execute
    run()
    # get outputs
    out = tvm.nd.empty(shape, dtype, ctx)
    get_output(0, out)
    np.testing.assert_allclose(
        out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
    server.terminate()
Ejemplo n.º 23
0
def test_rpc_executor():
    host = "localhost"
    port = 9091
    server = rpc.Server(host, port)

    x = sym.Variable("x")
    y = sym.Variable("y")
    z = sym.exp(y + x)
    shape = (10, 128)
    dtype = tvm.float32
    shape_dict = {"x": shape, "y": shape}
    tmp = util.tempdir()
    lib_name = tmp.relpath("net.o")

    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
    # save module
    lib.save(lib_name)
    remote = rpc.connect(host, port)
    remote.upload(lib_name)
    ctx = remote.cpu(0)
    # load remote
    rlib = remote.load_module("net.o")

    # Create remotemodule
    m = graph_runtime.create(graph, rlib, remote.cpu(0))
    # get member functions
    set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
    na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    # set inputs
    set_input("x", na)
    set_input("y", nb)
    # execute
    run()
    # get outputs
    out = tvm.nd.empty(shape, dtype, ctx)
    get_output(0, out)
    np.testing.assert_allclose(out.asnumpy(),
                               np.exp(na.asnumpy() + nb.asnumpy()))
    server.terminate()
Ejemplo n.º 24
0
Archivo: peak.py Proyecto: zheng-da/tvm
def measure_peak_all(target, target_host, host, port):
    """measure memory bandwidth and peak compute for gpu devices

    Parameters
    ----------
    target: str or :any:`tvm.target.Target`
    target_host: str
    host: str
    port: int
    """

    target = tvm.target.create(target)
    remote = rpc.connect(host, port)
    n_times = 20

    bandwidth_total_item = 1 << 25
    bandwidth_item_per_thread = 32

    compute_total_item = 1 << 21
    compute_item_per_thread = 4096

    if str(target).startswith("opencl"):
        ctx = remote.cl()
    elif str(target).startswith("cuda"):
        ctx = remote.gpu()
    elif str(target).startswith("metal"):
        ctx = remote.metal()
    else:
        raise RuntimeError("Unsupported target")

    logging.info("========== measure memory bandwidth ==========")
    measure_bandwidth_all_types(bandwidth_total_item, bandwidth_item_per_thread,
                                n_times, target, target_host, remote, ctx)

    logging.info("========== measure peak compute ==========")
    measure_compute_all_types(compute_total_item, compute_item_per_thread,
                              n_times, target, target_host, remote, ctx)
Ejemplo n.º 25
0
        def check():
            if not tvm.module.enabled("rpc"):
                return

            @tvm.register_func("rpc.test2.addone")
            def addone(x):
                return x + 1

            @tvm.register_func("rpc.test2.strcat")
            def addone(name, x):
                return "%s:%d" % (name, x)

            server = multiprocessing.Process(
                target=proxy.websocket_proxy_server,
                args=("ws://localhost:%d/ws" % web_port, "x1"))
            # Need to make sure that the connection start after proxy comes up
            time.sleep(0.1)
            server.deamon = True
            server.start()
            client = rpc.connect(prox.host, prox.port, key="x1")
            f1 = client.get_function("rpc.test2.addone")
            assert f1(10) == 11
            f2 = client.get_function("rpc.test2.strcat")
            assert f2("abc", 11) == "abc:11"
Ejemplo n.º 26
0
def test_bigendian_rpc_param():
    """Test big endian rpc when there is a PowerPC RPC server available"""
    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
    if host is None:
        return

    def verify_nnvm(remote, target, shape, dtype):
        x = nnvm.sym.Variable("x")
        y = x + 1
        graph, lib, _ = nnvm.compiler.build(y,
                                            target,
                                            shape={"x": shape},
                                            dtype={"x": dtype})

        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        a = np.random.randint(0, 256, size=shape).astype(dtype)
        a[:] = 1
        params = {"x": a}
        ctx = remote.cpu(0)
        m = graph_runtime.create(graph, lib, ctx)
        # uses save param_dict
        m.load_params(nnvm.compiler.save_param_dict(params))
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
        np.testing.assert_allclose(a + 1, out.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)
    target = "llvm -mtriple=powerpc-linux-gnu"
    for dtype in ["float32", "float64", "int32", "int8"]:
        verify_nnvm(remote, target, (10, ), dtype)
Ejemplo n.º 27
0
from tvm.contrib import rpc

REMOTE = rpc.connect("0.0.0.0", 9090, key="android")
Ejemplo n.º 28
0
input_name = 'input_0'
data_shape = img.shape
out_shape = (1,125,n//32,n//32)

# GET model from frameworks
# change xyz to supported framework name.
sym, params = nnvm.frontend.from_onnx(onnx_graph)
#print(sym.debug_str()

# connect to the proxy
# Set to be address of tvm proxy.
proxy_host = os.environ["TVM_ANDROID_RPC_PROXY_HOST"]
proxy_port = 9090
key = "android"
print('RPC Connecting...')
remote = rpc.connect(proxy_host, proxy_port, key=key)
print('RPC Connected')

arch = "arm64"
if exec_gpu:
    # Mobile GPU
    target = 'opencl'
    target_host = "llvm -target=%s-linux-android" % arch
    ctx = remote.cl(0)
else:
    # Mobile CPU
    target = "llvm -target=%s-linux-android" % arch
    target_host = None
    ctx = remote.cpu(0)

print('Build Graph...')
Ejemplo n.º 29
0
def test_gemm_gpu(N, times, bn, num_block, num_thread):
    assert (bn <= N)
    assert (num_thread * num_thread * 16 <= N)
    assert (num_block * num_block * 2 <= N)
    A = tvm.placeholder((N, N), name='A')
    B = tvm.placeholder((N, N), name='Btmp')
    k = tvm.reduce_axis((0, N), name='k')

    packedB = tvm.compute((N, N / bn, bn),
                          lambda x, y, z: B[x, y * bn + z],
                          name='B')

    C = tvm.compute((N, N),
                    lambda ii, jj: tvm.sum(
                        A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k),
                    name='C')

    s = tvm.create_schedule(C.op)
    CC = s.cache_write(C, "local")

    block_x = tvm.thread_axis("blockIdx.x")
    block_y = tvm.thread_axis("blockIdx.y")
    thread_x = tvm.thread_axis("threadIdx.x")
    thread_y = tvm.thread_axis("threadIdx.y")

    thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx")
    thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy")

    pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread)
    pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread)
    s[packedB].bind(pby, thread_y)
    s[packedB].bind(pbx, thread_x)
    pbz, pbk = s[packedB].split(packedB.op.axis[2], factor=8)
    s[packedB].vectorize(pbk)

    by, yi = s[C].split(C.op.axis[0], nparts=num_block)
    bx, xi = s[C].split(C.op.axis[1], nparts=num_thread)

    s[C].bind(by, block_y)
    s[C].bind(bx, thread_y)
    s[C].reorder(by, bx, yi, xi)

    tyz, yi = s[C].split(yi, nparts=2)
    ty, yi = s[C].split(yi, nparts=num_block)
    txz, xi = s[C].split(xi, nparts=2)
    tx, xi = s[C].split(xi, nparts=num_thread)

    s[C].reorder(tyz, txz, ty, tx, yi, xi)
    s[C].bind(tyz, thread_yz)
    s[C].bind(txz, thread_xz)

    s[C].bind(ty, block_x)
    s[C].bind(tx, thread_x)

    xyi, xxi = s[C].split(xi, factor=8)
    s[C].reorder(tyz, txz, ty, tx, yi, xyi, xxi)
    s[C].vectorize(xxi)

    s[CC].compute_at(s[C], yi)
    yo, xo = CC.op.axis
    s[CC].reorder(k, yo, xo)
    xo, xi = s[CC].split(xo, factor=8)
    s[CC].vectorize(xi)

    ko, ki = s[CC].split(k, factor=2)
    s[CC].unroll(ki)

    print(tvm.lower(s, [A, B, C], simple_mode=True))

    f = tvm.build(s, [A, B, C], "opencl", target_host=target, name="gemm_gpu")
    temp = util.tempdir()
    path_dso = temp.relpath("gemm_gpu.so")
    f.export_library(path_dso, ndk.create_shared)

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.cl(0)
    remote.upload(path_dso)
    f = remote.load_module("gemm_gpu.so")

    evaluate(f, ctx, N, times)
Ejemplo n.º 30
0
# library and the new parameter, since we do some optimization that will
# change the parameters but keep the result of model as the same.

# Save the library at local temporary directory.
tmp = util.tempdir()
lib_fname = tmp.relpath('net.tar')
lib.export_library(lib_fname)

######################################################################
# Deploy the Model Remotely by RPC
# --------------------------------
# With RPC, you can deploy the model remotely from your host machine
# to the remote device.

# connect the server
remote = rpc.connect(host, port)

# upload the library to remote device and load it
remote.upload(lib_fname)
rlib = remote.load_module('net.tar')

ctx = remote.cl(0)
# upload the parameter
rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

# create the remote runtime module
module = runtime.create(graph, rlib, ctx)
# set parameter
module.set_input(**rparams)
# set input data
module.set_input('data', tvm.nd.array(x.astype('float32')))
Ejemplo n.º 31
0
graph, lib, params = nnvm.compiler.build(sym,
                                         target,
                                         shape_dict,
                                         params=params,
                                         target_host=target_host)

# Save the library at local temporary directory.
tmp = util.tempdir()
path_o = tmp.relpath('sym.o')
path_cl = tmp.relpath('sym.cl')
path_json = tmp.relpath('sym.tvm_meta.json')
lib.save(path_o)
lib.imported_modules[0].save(path_cl)

# connect the server
remote = rpc.connect('192.168.1.14', 9090)

# upload the library to remote device and load it
remote.upload(path_o)
remote.upload(path_cl)
remote.upload(path_json)
fhost = remote.load_module('sym.o')
fdev = remote.load_module('sym.cl')
fhost.import_module(fdev)

from tvm.contrib import graph_runtime

ctx = remote.cl(0)
# upload the parameter
rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
dtype = 'float32'
def run_case(model, dtype):
    # load model
    if model == 'vgg16':
        net, params = nnvm.testing.vgg.get_workload(num_layers=16,
                                                    batch_size=1,
                                                    image_shape=image_shape,
                                                    dtype=dtype)
    elif model == 'resnet18':
        net, params = nnvm.testing.resnet.get_workload(num_layers=18,
                                                       batch_size=1,
                                                       image_shape=image_shape,
                                                       dtype=dtype)
    elif model == 'mobilenet':
        net, params = nnvm.testing.mobilenet.get_workload(
            batch_size=1, image_shape=image_shape, dtype=dtype)
    else:
        raise ValueError('no benchmark prepared for {}.'.format(model))

    # compile
    opt_level = 2 if dtype == 'float32' else 1
    with nnvm.compiler.build_config(opt_level=opt_level):
        graph, lib, params = nnvm.compiler.build(net,
                                                 tvm.target.mali(),
                                                 shape={"data": data_shape},
                                                 params=params,
                                                 dtype=dtype,
                                                 target_host=args.target_host)

    # upload model to remote device
    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.tar')
    lib.export_library(lib_fname)

    if args.host is not None:
        remote = rpc.connect(args.host, args.port)
        remote.upload(lib_fname)

        ctx = remote.cl(0)
        rlib = remote.load_module('net.tar')
        rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
    else:
        ctx = tvm.cl(0)
        rlib = lib
        rparams = params

    # create graph runtime
    module = runtime.create(graph, rlib, ctx)
    module.set_input(
        'data',
        tvm.nd.array(np.random.uniform(size=(data_shape)).astype(dtype)))
    module.set_input(**rparams)

    # benchmark
    # print("============================================================")
    # print("model: %s, dtype: %s" % (model, dtype))

    # the num of runs for warm up and test
    num_warmup = 10
    num_test = 60
    if model == 'mobilenet':  # mobilenet is fast, need more runs for stable measureament
        num_warmup *= 5
        num_test *= 5

    # perform some warm up runs
    # print("warm up..")
    warm_up_timer = module.module.time_evaluator("run", ctx, num_warmup)
    warm_up_timer()

    # test
    # print("test..")
    ftimer = module.module.time_evaluator("run", ctx, num_test)
    prof_res = ftimer()
    # print("cost per image: %.4fs" % prof_res.mean)

    print("backend: TVM-mali\tmodel: %s\tdtype: %s\tcost:%.4f" %
          (model, dtype, prof_res.mean))
Ejemplo n.º 33
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        choices=['resnet', 'mobilenet'],
                        help="The model type.")
    parser.add_argument('--host',
                        type=str,
                        required=True,
                        help="The host address of your Raspberry Pi.")
    parser.add_argument('--port',
                        type=int,
                        required=True,
                        help="The port number of your Raspberry Pi.")
    parser.add_argument('--opt-level',
                        type=int,
                        default=1,
                        help="Level of optimization.")
    parser.add_argument('--num-iter',
                        type=int,
                        default=50,
                        help="Number of iteration during benchmark.")
    args = parser.parse_args()

    opt_level = args.opt_level
    target = "llvm -target=armv7l-none-linux-anueabihf -mcpu=cortex-a53 -mattr=+neon"

    num_iter = args.num_iter
    batch_size = 1
    num_classes = 1000
    image_shape = (3, 224, 224)

    data_shape = (batch_size, ) + image_shape
    out_shape = (batch_size, num_classes)
    if args.model == 'resnet':
        net, params = nnvm.testing.resnet.get_workload(batch_size=1,
                                                       image_shape=image_shape)
    elif args.model == 'mobilenet':
        net, params = nnvm.testing.mobilenet.get_workload(
            batch_size=1, image_shape=image_shape)
    else:
        raise ValueError('no benchmark prepared for {}.'.format(args.model))

    with nnvm.compiler.build_config(opt_level=opt_level):
        with tvm.target.rasp():
            graph, lib, params = nnvm.compiler.build(
                net, target, shape={"data": data_shape}, params=params)

    tmp = util.tempdir()
    lib_fname = tmp.relpath('net.o')
    lib.save(lib_fname)

    remote = rpc.connect(args.host, args.port)
    remote.upload(lib_fname)

    ctx = remote.cpu(0)
    rlib = remote.load_module('net.o')
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

    module = runtime.create(graph, rlib, ctx)
    module.set_input(
        'data',
        tvm.nd.array(np.random.uniform(size=(data_shape)).astype("float32")))
    module.set_input(**rparams)
    module.run()
    out = module.get_output(0, tvm.nd.empty(out_shape, ctx=ctx))
    out.asnumpy()

    print('benchmark args: {}'.format(args))
    ftimer = module.module.time_evaluator("run", ctx, num_iter)
    for i in range(3):
        prof_res = ftimer()
        print(prof_res)
        # sleep for avoiding cpu overheat
        time.sleep(45)
Ejemplo n.º 34
0
def deploy_rpc():
    """Runs the demo that deploys a model remotely through RPC.
    """
    from tvm.contrib import rpc, util, emscripten

    # As usual, load the resnet18 model.
    net, params, data_shape, out_shape = load_mxnet_resnet()

    # Compile the model.
    # Note that this time we are changing the target.
    # This is because we want to translate the host library into JavaScript
    # through Emscripten.
    graph, lib, params = compile_net(
        net,
        target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
        target="opengl",
        data_shape=data_shape,
        params=params)

    # Now we want to deploy our model through RPC.
    # First we ned to prepare the module files locally.
    print("Saving the compiled module...")

    temp = util.tempdir()
    path_obj = temp.relpath("deploy.bc")  # host LLVM part
    path_dso = temp.relpath("deploy.js")  # host JavaScript part
    path_gl = temp.relpath("deploy.gl")  # device GLSL part
    path_json = temp.relpath("deploy.tvm_meta.json")

    lib.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    lib.imported_modules[0].save(path_gl)

    print("- Saved files:", temp.listdir())

    # Connect to the RPC server.
    print("Connecting to RPC server...")
    proxy_host = 'localhost'
    proxy_port = 9090
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    print("- Connected to RPC server!")

    # Upload module to RPC server.
    print("Uploading module to RPC server...")
    remote.upload(path_dso, "deploy.dso")
    remote.upload(path_gl)
    remote.upload(path_json)
    print("- Upload completed!")

    # Load remote library.
    print("Loading remote library...")
    fdev = remote.load_module("deploy.gl")
    fhost = remote.load_module("deploy.dso")
    fhost.import_module(fdev)
    rlib = fhost
    print("- Remote library loaded!")

    ctx = remote.opengl(0)

    # Upload the parameters.
    print("Uploading parameters...")
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
    print("- Parameters uploaded!")

    # Create the remote runtime module.
    print("Running remote module...")
    from tvm.contrib import graph_runtime
    module = graph_runtime.create(graph, rlib, ctx)

    # Set parameter.
    module.set_input(**rparams)

    # Set input data.
    input_data = np.random.uniform(size=data_shape)
    module.set_input('data', tvm.nd.array(input_data.astype('float32')))

    # Run.
    module.run()
    print("- Remote module execution completed!")

    out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx))
    # Print first 10 elements of output.
    print(out.asnumpy()[0][0:10])
Ejemplo n.º 35
0
##---------------------------------
## Under debugging
##---------------------------------

import cv2
import time

inshape = (1,3,224,224)
outshape = (1,1000)

basename="models/alexnet/deploy_rasp"
loaded_params = bytearray(open(basename+".params", "rb").read())

# connect the server
remote = rpc.connect("raspberrypi.local", 9090)

# upload the library to remote device and load it
lib_fname='models/alexnet/deploy_rasp.o'
print("uploading")
remote.upload(lib_fname)
print("loading module")
rlib = remote.load_module("deploy_rasp.o")
print("loading graph")
graph = open(basename+".json").read()

ctx = remote.cpu(0)
# upload the parameter
print("loading paramdict")
params = nnvm.compiler.load_param_dict(loaded_params)
print("converting paramdict")
Ejemplo n.º 36
0
def test_gemm_gpu(N, times, bn, num_block, num_thread):
    assert(bn <= N)
    assert(num_thread * num_thread * 16 <= N)
    assert(num_block * num_block * 2 <= N)
    A = tvm.placeholder((N, N), name='A')
    B = tvm.placeholder((N, N), name='Btmp')
    k = tvm.reduce_axis((0, N), name='k')

    packedB = tvm.compute((N, N / bn, bn),
              lambda x, y, z: B[x, y * bn + z], name = 'B')

    C = tvm.compute(
        (N, N),
        lambda ii, jj: tvm.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k),
        name='C')

    s = tvm.create_schedule(C.op)
    CC = s.cache_write(C, "local")

    block_x = tvm.thread_axis("blockIdx.x")
    block_y = tvm.thread_axis("blockIdx.y")
    thread_x = tvm.thread_axis("threadIdx.x")
    thread_y = tvm.thread_axis("threadIdx.y")

    thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx")
    thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy")

    pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread)
    pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread)
    s[packedB].bind(pby, thread_y)
    s[packedB].bind(pbx, thread_x)
    pbz, pbk = s[packedB].split(packedB.op.axis[2], factor=8)
    s[packedB].vectorize(pbk)

    by, yi = s[C].split(C.op.axis[0], nparts=num_block)
    bx, xi = s[C].split(C.op.axis[1], nparts=num_thread)

    s[C].bind(by, block_y)
    s[C].bind(bx, thread_y)
    s[C].reorder(by, bx, yi, xi)

    tyz, yi = s[C].split(yi, nparts=2)
    ty, yi = s[C].split(yi, nparts=num_block)
    txz, xi = s[C].split(xi, nparts=2)
    tx, xi = s[C].split(xi, nparts=num_thread)

    s[C].reorder(tyz, txz, ty, tx, yi, xi)
    s[C].bind(tyz, thread_yz)
    s[C].bind(txz, thread_xz)

    s[C].bind(ty, block_x)
    s[C].bind(tx, thread_x)

    xyi, xxi = s[C].split(xi, factor=8)
    s[C].reorder(tyz, txz, ty, tx, yi, xyi, xxi)
    s[C].vectorize(xxi)

    s[CC].compute_at(s[C], yi)
    yo, xo = CC.op.axis
    s[CC].reorder(k, yo, xo)
    xo, xi = s[CC].split(xo, factor=8)
    s[CC].vectorize(xi)

    ko, ki = s[CC].split(k, factor=2)
    s[CC].unroll(ki)

    print(tvm.lower(s, [A, B, C], simple_mode=True))

    f = tvm.build(s, [A, B, C], "opencl", target_host=target, name="gemm_gpu")
    temp = util.tempdir()   
    path_dso = temp.relpath("gemm_gpu.so")
    f.export_library(path_dso, ndk.create_shared)

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.cl(0)
    remote.upload(path_dso)
    f = remote.load_module("gemm_gpu.so")

    evaluate(f, ctx, N, times)
Ejemplo n.º 37
0
#   It is recommended to set target triple and feature set to contain specific
#   feature available, so we can take full advantage of the features of the
#   board.
#   You can find more details about cross compilation attributes from
#   `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_.

######################################################################
# Run Kernel Remotely by RPC
# --------------------------
# Here we will show you how to run the kernel on the remote device:

# replace host with the ip address of your device
host = '0.0.0.0'
port = 9090
# connect the remote device
remote = rpc.connect(host, port)

######################################################################
# Here we upload the lib to the remote device, then invoke a device local
# compiler for shared lib and load it into device memory. now `f` is a
# remote module object.
remote.upload(path)
f = remote.load_module('mylib.o')

# create array on the remote device
ctx = remote.cpu(0)
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
# the function will run on the remote device
f(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
Ejemplo n.º 38
0
def test_rpc_remote_module():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)

    def check_remote():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    def check_remote_link_cl():
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        if not tvm.module.enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cl(0)
        s = tvm.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
        s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote()
Ejemplo n.º 39
0
def test_rpc_remote_module():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)

    def check_remote():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    def check_remote_link_cl():
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        if not tvm.module.enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cl(0)
        s = tvm.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
        s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote()