Esempio n. 1
0
def test_bigendian_rpc_param():
    """Test big endian rpc when there is a PowerPC RPC server available"""
    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
    if host is None:
        return

    def verify_graph_runtime(remote, target, shape, dtype):
        x = relay.var('x')
        y = relay.const(1)
        z = relay.add(x, y)
        func = relay.Function([x], z)

        x_in = np.ones(shape).astype(dtype)
        params = {'x': x_in}
        graph, lib, params = relay.build(func, target=target, params=params)

        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        ctx = remote.cpu(0)
        mod = graph_runtime.create(graph, lib, ctx)
        mod.load_params(relay.save_param_dict(params))
        mod.run()
        out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
        tvm.testing.assert_allclose(x_in + 1, out.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)
    target = "llvm -mtriple=powerpc-linux-gnu"
    for dtype in ["float32", "float64", "int32", "int8"]:
        verify_graph_runtime(remote, target, (10,), dtype)
Esempio n. 2
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    target = "llvm -target=asmjs-unknown-emscripten -system-lib"
    def check_remote():
        if not tvm.module.enabled(target):
            print("Skip because %s is not enabled" % target)
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], target, name="myadd")
        path_obj = temp.relpath("dev_lib.bc")
        path_dso = temp.relpath("dev_lib.js")
        f.save(path_obj)
        emscripten.create_js(path_dso, path_obj, side_module=True)
        # Upload to suffix as dso so it can be loaded remotely
        remote.upload(path_dso, "dev_lib.dso")
        data = remote.download("dev_lib.dso")
        f1 = remote.load_module("dev_lib.dso")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
    check_remote()
Esempio n. 3
0
def test_bigendian_rpc():
    """Test big endian rpc when there is a PowerPC RPC server available"""
    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
    if host is None:
        return
    def verify_rpc(remote, target, shape, dtype):
        A = tvm.placeholder(shape, dtype=dtype)
        B = tvm.compute(A.shape, lambda i: A[i]+tvm.const(1, A.dtype))
        s = tvm.create_schedule(B.op)
        f = tvm.build(s, [A, B], target, name="myadd")

        ctx = remote.cpu(0)
        a = tvm.nd.array(np.random.randint(0, 256, size=shape).astype(A.dtype), ctx=ctx)
        b = tvm.nd.array(np.zeros(shape).astype(A.dtype), ctx=ctx)
        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        f.save(path_dso)
        remote.upload(path_dso)
        f = remote.load_module("dev_lib.o")
        f(a, b)
        tvm.testing.assert_allclose(a.asnumpy() + 1, b.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)
    target = "llvm -mtriple=powerpc-linux-gnu"
    for dtype in ["float32", "float64", "int32", "int8"]:
        verify_rpc(remote, target, (10,), dtype)
Esempio n. 4
0
def test_rpc_simple():
    if not tvm.module.enabled("rpc"):
        return
    @tvm.register_func("rpc.test.addone")
    def addone(x):
        return x + 1
    @tvm.register_func("rpc.test.strcat")
    def strcat(name, x):
        return "%s:%d" % (name, x)

    @tvm.register_func("rpc.test.except")
    def remotethrow(name):
        raise ValueError("%s" % name)

    server = rpc.Server("localhost", key="x1")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.addone")
    assert f1(10) == 11
    f3 = client.get_function("rpc.test.except")
    try:
        f3("abc")
        assert False
    except tvm.TVMError as e:
        assert "abc" in str(e)

    f2 = client.get_function("rpc.test.strcat")
    assert f2("abc", 11) == "abc:11"
Esempio n. 5
0
    def build_arm():
        target = "llvm -target=armv7-none-linux-gnueabihf"
        if not tvm.module.enabled(target):
            print("Skip because %s is not enabled.." % target)
            return
        temp = util.tempdir()
        f = tvm.build(s, [A, B, C], target)
        path = temp.relpath("myadd.o")
        f.save(path)
        verify_elf(path, 0x28)
        asm_path = temp.relpath("myadd.asm")
        f.save(asm_path)
        # Do a RPC verification, launch kernel on Arm Board if available.
        host = os.environ.get('TVM_RPC_ARM_HOST', None)
        remote = None
        if host:
            port = int(os.environ['TVM_RPC_ARM_PORT'])
            try:
                remote = rpc.connect(host, port)
            except tvm.TVMError as e:
                pass

        if remote:
            remote.upload(path)
            farm = remote.load_module("myadd.o")
            ctx = remote.cpu(0)
            n = nn
            a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
            b = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
            c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
            farm(a, b, c)
            tvm.testing.assert_allclose(
                c.asnumpy(), a.asnumpy() + b.asnumpy())
            print("Verification finish on remote..")
def run_opencl():
    # NOTE: This is the setting for my rk3399 board. You need to modify
    # them according to your environment.
    target_host = "llvm -target=aarch64-linux-gnu"
    opencl_device_host = '10.77.1.145'
    opencl_device_port = 9090

    # create scheule for the above "add one" compute decleration
    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=32)
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
    func = tvm.build(s, [A, B], "opencl", target_host=target_host)

    remote = rpc.connect(opencl_device_host, opencl_device_port)

    # export and upload
    path = temp.relpath('lib_cl.tar')
    func.export_library(path)
    remote.upload(path)
    func = remote.load_module('lib_cl.tar')

    # run
    ctx = remote.cl()
    a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    func(a, b)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
    print("OpenCP test passed!")
Esempio n. 7
0
def test_rpc_file_exchange():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    blob = bytearray(np.random.randint(0, 10, size=(10)))
    remote.upload(blob, "dat.bin")
    rev = remote.download("dat.bin")
    assert(rev == blob)
Esempio n. 8
0
def program_rpc_bitstream(path=None):
    """Program the FPGA on the RPC server

    Parameters
    ----------
    path : path to bitstream (optional)
    """
    assert tvm.module.enabled("rpc")
    remote = rpc.connect(host, port)
    program_fpga(remote, path)
Esempio n. 9
0
def test_rpc_return_func():
    @tvm.register_func("rpc.test.remote_func")
    def addone(x):
        return lambda y: x+y

    server = rpc.Server("localhost", key="x1")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.remote_func")
    fadd = f1(10)
    assert fadd(12) == 22
Esempio n. 10
0
def test_rpc_module():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    temp = util.tempdir()
    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    # Build the dynamic lib.
    # If we don't want to do metal and only use cpu, just set target to be target
    f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd")
    path_dso1 = temp.relpath("dev_lib.dylib")
    f.export_library(path_dso1, xcode.create_dylib,
                     arch=arch, sdk=sdk)
    xcode.codesign(path_dso1)

    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=64)
    s[B].parallel(xi)
    s[B].pragma(xo, "parallel_launch_point")
    s[B].pragma(xi, "parallel_barrier_when_finish")
    f = tvm.build(s, [A, B], target, name="myadd_cpu")
    path_dso2 = temp.relpath("cpu_lib.dylib")
    f.export_library(path_dso2, xcode.create_dylib,
                     arch=arch, sdk=sdk)
    xcode.codesign(path_dso2)

    # Start RPC test server that contains the compiled library.
    server = xcode.popen_test_rpc(proxy_host, proxy_port, key,
                                  destination=destination,
                                  libs=[path_dso1, path_dso2])

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.metal(0)
    f1 = remote.load_module("dev_lib.dylib")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f1.time_evaluator(f1.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
    # CPU
    ctx = remote.cpu(0)
    f2 = remote.load_module("cpu_lib.dylib")
    a_np = np.random.uniform(size=1024).astype(A.dtype)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
    time_f = f2.time_evaluator(f1.entry_name, ctx, number=10)
    cost = time_f(a, b).mean
    print('%g secs/op' % cost)
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
Esempio n. 11
0
def try_remote_save_load():
    if not tvm.module.enabled("rpc"):
        return
    if not tvm.module.enabled("opengl"):
        return
    if not tvm.module.enabled("llvm"):
        return

    # Build the module.
    n = tvm.var("n")
    A = tvm.placeholder((n,), name='A')
    B = tvm.placeholder((n,), name='B')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule(C.op)
    s[C].opengl()
    target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
    f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd")

    remote = rpc.connect(proxy_host, proxy_port, key="js")

    temp = util.tempdir()
    ctx = remote.opengl(0)
    path_obj = temp.relpath("myadd.bc")
    path_dso = temp.relpath("myadd.js")
    path_gl = temp.relpath("myadd.gl")
    path_json = temp.relpath("myadd.tvm_meta.json")

    f.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    f.imported_modules[0].save(path_gl)

    remote.upload(path_dso, "myadd.dso")
    remote.upload(path_gl)
    remote.upload(path_json)

    remote.download("myadd.dso")
    remote.download("myadd.gl")
    remote.download("myadd.tvm_meta.json")

    print('Loading myadd.dso')
    fhost = remote.load_module("myadd.dso")

    print('Loading myadd.gl')
    fdev = remote.load_module("myadd.gl")

    print('import_module')
    fhost.import_module(fdev)

    print('running...')
    a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
    c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
    fhost(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
Esempio n. 12
0
def run(run_func):
    """Run test function on all available env.

    Parameters
    ----------
    run_func : function(env, remote)
    """
    env = get_env()

    if env.TARGET == "sim":

        # Talk to local RPC if necessary to debug RPC server.
        # Compile vta on your host with make at the root.
        # Make sure TARGET is set to "sim" in the config.json file.
        # Then launch the RPC server on the host machine
        # with ./apps/pynq_rpc/start_rpc_server.sh
        # Set your VTA_LOCAL_SIM_RPC environment variable to
        # the port it's listening to, e.g. 9090
        local_rpc = int(os.environ.get("VTA_LOCAL_SIM_RPC", "0"))
        if local_rpc:
            remote = rpc.connect("localhost", local_rpc)
            run_func(env, remote)
        else:
            # Make sure simulation library exists
            # If this fails, build vta on host (make)
            # with TARGET="sim" in the json.config file.
            assert simulator.enabled()
            run_func(env, rpc.LocalSession())

    elif env.TARGET == "pynq":

        # Run on PYNQ if env variable exists
        host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
        port = int(os.environ.get("VTA_PYNQ_RPC_PORT", None))
        if host and port:
            remote = rpc.connect(host, port)
            run_func(env, remote)
        else:
            raise RuntimeError(
                "Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables")
Esempio n. 13
0
def test_rpc_array():
    if not tvm.module.enabled("rpc"):
        return
    x = np.random.randint(0, 10, size=(3, 4))
    @tvm.register_func("rpc.test.remote_array_func")
    def remote_array_func(y):
        np.testing.assert_equal(y.asnumpy(), x)
    server = rpc.Server("localhost")
    remote = rpc.connect(server.host, server.port)
    r_cpu = tvm.nd.array(x, remote.cpu(0))
    assert str(r_cpu.context).startswith("remote")
    np.testing.assert_equal(r_cpu.asnumpy(), x)
    fremote = remote.get_function("rpc.test.remote_array_func")
    fremote(r_cpu)
Esempio n. 14
0
 def check_remote():
     if not tvm.module.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     server = rpc.Server("localhost")
     remote = rpc.connect(server.host, server.port)
     temp = util.tempdir()
     ctx = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     mod = graph_runtime.create(graph, mlib, remote.cpu(0))
     a = np.random.uniform(size=(n,)).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, ctx))
     out = tvm.nd.empty((n,), ctx=ctx)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.asnumpy(), a + 1)
Esempio n. 15
0
 def check():
     if not tvm.module.enabled("rpc"):
         return
     @tvm.register_func("rpc.test2.addone")
     def addone(x):
         return x + 1
     @tvm.register_func("rpc.test2.strcat")
     def addone(name, x):
         return "%s:%d" % (name, x)
     server = multiprocessing.Process(
         target=proxy.websocket_proxy_server,
         args=("ws://localhost:%d/ws" % web_port,"x1"))
     # Need to make sure that the connection start after proxy comes up
     time.sleep(0.1)
     server.deamon = True
     server.start()
     client = rpc.connect(prox.host, prox.port, key="x1")
     f1 = client.get_function("rpc.test2.addone")
     assert f1(10) == 11
     f2 = client.get_function("rpc.test2.strcat")
     assert f2("abc", 11) == "abc:11"
Esempio n. 16
0
def test_rpc_executor():
    host = "localhost"
    port = 9021
    server = rpc.Server(host, port, use_popen=True)
    time.sleep(1)
    x = sym.Variable("x")
    y = sym.Variable("y")
    z = sym.exp(y + x)
    shape = (10, 128)
    dtype = tvm.float32
    shape_dict = {"x": shape, "y": shape}
    tmp = util.tempdir()
    lib_name  = tmp.relpath("net.o")

    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
    # save module
    lib.save(lib_name)
    remote = rpc.connect(host, port)
    remote.upload(lib_name)
    ctx = remote.cpu(0)
    # load remote
    rlib = remote.load_module("net.o")

    # Create remotemodule
    m = graph_runtime.create(graph, rlib, remote.cpu(0))
    # get member functions
    set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
    na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
    # set inputs
    set_input("x", na)
    set_input("y", nb)
    # execute
    run()
    # get outputs
    out = tvm.nd.empty(shape, dtype, ctx)
    get_output(0, out)
    tvm.testing.assert_allclose(
        out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
    server.terminate()
Esempio n. 17
0
def test_rpc_return_ndarray():
    # Use closure to check the ref counter correctness
    nd = tvm.nd.array(np.zeros(10).astype("float32"))
    @tvm.register_func("rpc.test.remote_return_nd")
    def my_module(name):
        if name == "get_arr":
            return lambda : nd
        elif name == "ref_count":
            return lambda : tvm._api_internal._ndarray_use_count(nd)
        elif name == "get_elem":
            return lambda idx: nd.asnumpy()[idx]
        elif name == "get_arr_elem":
            return lambda arr, idx: arr.asnumpy()[idx]

    # start server
    server = rpc.Server("localhost", key="x1")
    client = rpc.connect(server.host, server.port, key="x1")
    m = client.get_function("rpc.test.remote_return_nd")
    get_arr = m("get_arr")
    ref_count = m("ref_count")
    get_elem = m("get_elem")
    get_arr_elem = m("get_arr_elem")
    # array test
    def run_arr_test():
        arr = get_arr()
        assert ref_count() == 2
        arr2 = get_arr()
        assert ref_count() == 3
        assert arr.context == client.cpu(0)
        arr.copyfrom(np.ones(10).astype(arr.dtype))
        assert arr2.asnumpy()[0] == 1.0
        assert get_elem(0) == 1.0
        assert get_arr_elem(arr2, 0) == 1.0

    assert ref_count() == 1
    run_arr_test()
    # check recycle correctness
    assert ref_count() == 1
Esempio n. 18
0
def test_bigendian_rpc_param():
    """Test big endian rpc when there is a PowerPC RPC server available"""
    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
    if host is None:
        return

    def verify_nnvm(remote, target, shape, dtype):
        x = nnvm.sym.Variable("x")
        y = x + 1
        graph, lib, _ = nnvm.compiler.build(
            y, target,
            shape={"x": shape},
        dtype={"x": dtype})

        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        a = np.random.randint(0, 256, size=shape).astype(dtype)
        a[:] = 1
        params = {"x" : a}
        ctx = remote.cpu(0)
        m = graph_runtime.create(graph, lib, ctx)
        # uses save param_dict
        m.load_params(nnvm.compiler.save_param_dict(params))
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
        tvm.testing.assert_allclose(a + 1, out.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)
    target = "llvm -mtriple=powerpc-linux-gnu"
    for dtype in ["float32", "float64", "int32", "int8"]:
        verify_nnvm(remote, target, (10,), dtype)
Esempio n. 19
0
def run_model():
    kernelstr = " no kernel" if args.nokernel  else ""
    print("A", args.activation_bits, "W", args.weight_bits, kernelstr, sep="")
    global net, params

    net = net[net.entry_func]
    # compile kernels with history best records.
    with autotvm.apply_history_best(log_file):
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                net, target=target, params=params)

        # Upload module to device
        host = os.environ['PI']
        port = int(os.environ['PORT'])
        remote = rpc.connect(host, port)
        ctx = remote.cpu()

        # export library
        tmp = util.tempdir()
        lib_fname = tmp.relpath('net.tar')
        lib.export_library(lib_fname)

        # upload the library to remote device and load it
        remote.upload(lib_fname)
        rlib = remote.load_module('net.tar')

        # create the remote runtime module
        module = runtime.create(graph, rlib, ctx)

        # set parameter (upload params to the remote device. This may take a while)
        data = get_image()
        module.set_input(**params)

        synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
                            '4d0b62f3d01426887599d4f7ede23ee5/raw/',
                            '596b27d23537e5a1b5751d2b0481ef172f58b539/',
                            'imagenet1000_clsid_to_human.txt'])
        synset_name = 'imagenet1000_clsid_to_human.txt'
        synset_path = download_testdata(synset_url, synset_name, module='data')
        with open(synset_path) as f:
            synset = eval(f.read())

        # Confirm correctness with tf model
        test_input = tf.constant(data.astype('float32'))
        output = model(test_input)  
        top1_tf = np.argmax(output[0].numpy())
        print('TF top-1 id: {}, class name: {}'.format(top1_tf, synset[top1_tf]))

        if args.nokernel:
             data = data.transpose((0, 3, 1, 2))
        module.set_input('input_1', data)
        module.run()
        tvm_out = module.get_output(0)
        top1_tvm = np.argmax(tvm_out.asnumpy()[0])
        print('RPI top-1 id: {}, class name: {}'.format(top1_tvm, synset[top1_tvm]))

        # Check the actual vector output is within fp error
        np.testing.assert_allclose(output, tvm_out.asnumpy(), rtol=1e-3)

        # Benchmark time
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=repeats, repeat=args.repeats)
        prof_res = np.array(ftimer().results) * 1000 # Convert to milliseconds
        mean_ms = np.mean(prof_res)
        std_dev_ms = np.std(prof_res)
        print("Mean inference time (std dev): %.2f ms (%.2f ms)\n" %
                (mean_ms, std_dev_ms))

        with open("data/end2end.csv", "a") as f:
            ukernel = "no" if args.nokernel else "yes"
            f.write("ARM,A%dW%d,%s,%f,%f\n" % (args.activation_bits, 
                args.weight_bits, ukernel, mean_ms, std_dev_ms))
Esempio n. 20
0
def test_can_call_remote_function_with_rpc_standalone(host, port):
    remote_session = rpc.connect(host, port)
    f = remote_session.get_function("runtime.GetFFIString")
    assert f("hello") == "hello"
Esempio n. 21
0
def test_gemm_gpu(N, times, bn, num_block, num_thread):
    assert bn <= N
    assert num_thread * num_thread * 16 <= N
    assert num_block * num_block * 2 <= N
    A = te.placeholder((N, N), name="A")
    B = te.placeholder((N, N), name="Btmp")
    k = te.reduce_axis((0, N), name="k")

    packedB = te.compute((N, N / bn, bn), lambda x, y, z: B[x, y * bn + z], name="B")

    C = te.compute(
        (N, N), lambda ii, jj: te.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k), name="C"
    )

    s = te.create_schedule(C.op)
    CC = s.cache_write(C, "local")

    block_x = te.thread_axis("blockIdx.x")
    block_y = te.thread_axis("blockIdx.y")
    thread_x = te.thread_axis("threadIdx.x")
    thread_y = te.thread_axis("threadIdx.y")

    thread_xz = te.thread_axis((0, 2), "vthread", name="vx")
    thread_yz = te.thread_axis((0, 2), "vthread", name="vy")

    pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread)
    pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread)
    s[packedB].bind(pby, thread_y)
    s[packedB].bind(pbx, thread_x)
    pbz, pbk = s[packedB].split(packedB.op.axis[2], factor=8)
    s[packedB].vectorize(pbk)

    by, yi = s[C].split(C.op.axis[0], nparts=num_block)
    bx, xi = s[C].split(C.op.axis[1], nparts=num_thread)

    s[C].bind(by, block_y)
    s[C].bind(bx, thread_y)
    s[C].reorder(by, bx, yi, xi)

    tyz, yi = s[C].split(yi, nparts=2)
    ty, yi = s[C].split(yi, nparts=num_block)
    txz, xi = s[C].split(xi, nparts=2)
    tx, xi = s[C].split(xi, nparts=num_thread)

    s[C].reorder(tyz, txz, ty, tx, yi, xi)
    s[C].bind(tyz, thread_yz)
    s[C].bind(txz, thread_xz)

    s[C].bind(ty, block_x)
    s[C].bind(tx, thread_x)

    xyi, xxi = s[C].split(xi, factor=8)
    s[C].reorder(tyz, txz, ty, tx, yi, xyi, xxi)
    s[C].vectorize(xxi)

    s[CC].compute_at(s[C], yi)
    yo, xo = CC.op.axis
    s[CC].reorder(k, yo, xo)
    xo, xi = s[CC].split(xo, factor=8)
    s[CC].vectorize(xi)

    ko, ki = s[CC].split(k, factor=2)
    s[CC].unroll(ki)

    print(tvm.lower(s, [A, B, C], simple_mode=True))

    f = tvm.build(s, [A, B, C], "opencl", target_host=target, name="gemm_gpu")
    temp = utils.tempdir()
    path_dso = temp.relpath("gemm_gpu.so")
    f.export_library(path_dso, ndk.create_shared)

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.cl(0)
    remote.upload(path_dso)
    f = remote.load_module("gemm_gpu.so")

    evaluate(f, ctx, N, times)
Esempio n. 22
0
lib.export_library(lib_fname)

######################################################################
# Deploy the Model Remotely by RPC
# --------------------------------
# With RPC, you can deploy the model remotely from your host machine
# to the remote device.

# obtain an RPC session from remote device.
if local_demo:
    remote = rpc.LocalSession()
else:
    # The following is my environment, change this to the IP address of your target device
    host = '10.77.1.145'
    port = 9090
    remote = rpc.connect(host, port)

# upload the library to remote device and load it
remote.upload(lib_fname)
rlib = remote.load_module('net.tar')

ctx = remote.cpu(0) if local_demo else remote.cl(0)
# upload the parameter
rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

# create the remote runtime module
module = runtime.create(graph, rlib, ctx)
# set parameter
module.set_input(**rparams)
# set input data
module.set_input('data', tvm.nd.array(x.astype('float32')))
Esempio n. 23
0
def test_can_call_remote_function_with_rpc_proxy(host, port):
    remote_session = rpc.connect(host, port, key=DEVICE_KEY)
    f = remote_session.get_function("runtime.GetFFIString")
    assert f("hello") == "hello"
# Otherwise, if target is 'sim', execute locally.

if env.TARGET not in ["sim", "tsim"]:

    # Get remote from tracker node if environment variable is set.
    # To set up the tracker, you'll need to follow the "Auto-tuning
    # a convolutional network for VTA" tutorial.
    tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
    tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
    # Otherwise if you have a device you want to program directly from
    # the host, make sure you've set the variables below to the IP of
    # your board.
    device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
    device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
    if not tracker_host or not tracker_port:
        remote = rpc.connect(device_host, int(device_port))
    else:
        remote = autotvm.measure.request_remote(env.TARGET,
                                                tracker_host,
                                                int(tracker_port),
                                                timeout=10000)

    # Reconfigure the JIT runtime and FPGA.
    # You can program the FPGA with your own custom bitstream
    # by passing the path to the bitstream file instead of None.
    reconfig_start = time.time()
    vta.reconfig_runtime(remote)
    vta.program_fpga(remote, bitstream=None)
    reconfig_time = time.time() - reconfig_start
    print(
        "Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))
Esempio n. 25
0
def test_rpc_return_func():
    server = rpc.Server("localhost", key="x1")
    client = rpc.connect(server.host, server.port, key="x1")
    f1 = client.get_function("rpc.test.remote_func")
    fadd = f1(10)
    assert fadd(12) == 22
Esempio n. 26
0
File: runner.py Progetto: wenxcs/tvm
def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
    end_to_end: bool = False,
    options: dict = None,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug runtime.
    end_to_end : bool
        Whether to measure the time of memory copies as well as model
        execution. Turning this on can provide a more realistic estimate
        of how long running the model in production would take.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it.")

    with ExitStack() as stack:
        # Currently only two package formats are supported: "classic" and
        # "mlf". The later can only be used for micro targets, i.e. with microTVM.
        if device == "micro":
            if tvmc_package.type != "mlf":
                raise TVMCException(
                    f"Model {tvmc_package.package_path} is not a MLF archive.")

            project_dir = get_project_dir(tvmc_package.project_dir)

            # This is guaranteed to work since project_dir was already checked when
            # building the dynamic parser to accommodate the project options, so no
            # checks are in place when calling GeneratedProject.
            project_ = project.GeneratedProject.from_directory(
                project_dir, options)
        else:
            if tvmc_package.type == "mlf":
                raise TVMCException(
                    "You're trying to run a model saved using the Model Library Format (MLF). "
                    "MLF can only be used to run micro device ('--device micro')."
                )

        if hostname:
            if isinstance(port, str):
                port = int(port)
            # Remote RPC
            if rpc_key:
                logger.debug("Running on remote RPC tracker with key %s.",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("Running on remote RPC with no key.")
                session = rpc.connect(hostname, port)
        elif device == "micro":
            # Remote RPC (running on a micro target)
            logger.debug("Running on remote RPC (micro target).")
            try:
                session = tvm.micro.Session(project_.transport())
                stack.enter_context(session)
            except:
                raise TVMCException(
                    "Could not open a session with the micro target.")
        else:
            # Local
            logger.debug("Running a local session.")
            session = rpc.LocalSession()

        # Micro targets don't support uploading a model. The model to be run
        # must be already flashed into the micro target before one tries
        # to run it. Hence skip model upload for micro targets.
        if device != "micro":
            session.upload(tvmc_package.lib_path)
            lib = session.load_module(tvmc_package.lib_name)

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("Device is %s.", device)
        if device == "cuda":
            dev = session.cuda()
        elif device == "cl":
            dev = session.cl()
        elif device == "metal":
            dev = session.metal()
        elif device == "vulkan":
            dev = session.vulkan()
        elif device == "rocm":
            dev = session.rocm()
        elif device == "micro":
            dev = session.device
            lib = session.get_system_lib()
        else:
            assert device == "cpu"
            dev = session.cpu()

        # TODO(gromero): Adjust for micro targets.
        if profile:
            logger.debug("Creating runtime with profiling enabled.")
            module = debug_executor.create(tvmc_package.graph,
                                           lib,
                                           dev,
                                           dump_root="./prof")
        else:
            if device == "micro":
                logger.debug(
                    "Creating runtime (micro) with profiling disabled.")
                module = tvm.micro.create_local_graph_executor(
                    tvmc_package.graph, lib, dev)
            else:
                logger.debug("Creating runtime with profiling disabled.")
                module = runtime.create(tvmc_package.graph, lib, dev)

        logger.debug("Loading params into the runtime module.")
        module.load_params(tvmc_package.params)

        logger.debug("Collecting graph input shape and type:")
        shape_dict, dtype_dict = module.get_input_info()
        logger.debug("Graph input shape: %s", shape_dict)
        logger.debug("Graph input type: %s", dtype_dict)

        inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                       fill_mode)

        logger.debug("Setting inputs to the module.")
        module.set_input(**inputs_dict)

        # Run must be called explicitly if profiling
        if profile:
            logger.info("Running the module with profiling enabled.")
            report = module.profile()
            # This print is intentional
            print(report)

        if device == "micro":
            # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
            # fixed module.benchmark() can be used instead and this if/else can
            # be removed.
            module.run()
            times = []
        else:
            # Call the benchmarking function of the executor.
            # Optionally measure e2e data transfers from the
            # CPU to device memory overheads (e.g. PCIE
            # overheads if the device is a discrete GPU).
            if end_to_end:
                dev = session.cpu()
            times = module.benchmark(dev,
                                     number=number,
                                     repeat=repeat,
                                     end_to_end=end_to_end)

        logger.debug("Collecting the output tensors.")
        num_outputs = module.get_num_outputs()
        outputs = {}
        for i in range(num_outputs):
            output_name = "output_{}".format(i)
            outputs[output_name] = module.get_output(i).numpy()

        return TVMCResult(outputs, times)
Esempio n. 27
0
def run_module(
    module_file,
    device,
    hostname=None,
    port=9090,
    rpc_key=None,
    inputs=None,
    fill_mode="random",
    repeat=1,
    profile=False,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    module_file : str
        The path to the module file (a .tar file).
    device: str,
        the device (e.g. "cpu" or "gpu") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """

    with tempfile.TemporaryDirectory() as tmp_dir:
        logger.debug("extracting module file %s", module_file)
        t = tarfile.open(module_file)
        t.extractall(tmp_dir)
        graph = open(os.path.join(tmp_dir, "mod.json")).read()
        params = bytearray(
            open(os.path.join(tmp_dir, "mod.params"), "rb").read())

        if hostname:
            # Remote RPC
            if rpc_key:
                logger.debug("running on remote RPC tracker with key %s",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("running on remote RPC with no key")
                session = rpc.connect(hostname, port)
        else:
            # Local
            logger.debug("running a local session")
            session = rpc.LocalSession()

        session.upload(os.path.join(tmp_dir, "mod.so"))
        lib = session.load_module("mod.so")

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("device is %s", device)
        if device == "gpu":
            dev = session.gpu()
        elif device == "cl":
            dev = session.cl()
        else:
            assert device == "cpu"
            dev = session.cpu()

        if profile:
            logger.debug("creating runtime with profiling enabled")
            module = debug_executor.create(graph, lib, dev, dump_root="./prof")
        else:
            logger.debug("creating runtime with profiling disabled")
            module = runtime.create(graph, lib, dev)

        logger.debug("load params into the runtime module")
        module.load_params(params)

        shape_dict, dtype_dict = get_input_info(graph, params)
        inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                       fill_mode)

        logger.debug("setting inputs to the module")
        module.set_input(**inputs_dict)

        # Run must be called explicitly if profiling
        if profile:
            logger.debug("running the module with profiling enabled")
            module.run()

        # create the module time evaluator (returns a function)
        timer = module.module.time_evaluator("run", dev, 1, repeat=repeat)
        # call the evaluator function to invoke the module and save execution times
        prof_result = timer()
        # collect a list of execution times from the profiling results
        times = prof_result.results

        logger.debug("collecting the output tensors")
        num_outputs = module.get_num_outputs()
        outputs = {}
        for i in range(num_outputs):
            output_name = "output_{}".format(i)
            outputs[output_name] = module.get_output(i).asnumpy()

        return outputs, times
def test_rpc_remote_module():
    if not tvm.runtime.enabled("rpc"):
        return
    # graph
    n = tvm.runtime.convert(102)
    A = te.placeholder((n, ), name="A")
    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
    s = te.create_schedule(B.op)

    server0 = rpc.Server("localhost", key="x0")
    server1 = rpc.Server("localhost", key="x1")

    client = rpc.connect(
        server0.host,
        server0.port,
        key="x0",
        session_constructor_args=[
            "rpc.Connect", server1.host, server1.port, "x1"
        ],
    )

    def check_remote(remote):
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print("%g secs/op" % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    def check_minrpc():
        if tvm.get_global_func("rpc.PopenSession", allow_missing=True) is None:
            return
        # export to minrpc
        temp = util.tempdir()
        f = tvm.build(s, [A, B], "llvm --system-lib", name="myadd")
        path_minrpc = temp.relpath("dev_lib.minrpc")
        f.export_library(path_minrpc, rpc.with_minrpc(cc.create_executable))

        with pytest.raises(RuntimeError):
            rpc.PopenSession("filenotexist")

        # statrt the minrpc session.
        remote = tvm.rpc.PopenSession(path_minrpc)
        ctx = remote.cpu(0)
        f1 = remote.system_lib()

        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator("myadd", remote.cpu(0), number=1)
        cost = time_f(a, b).mean
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

        # change to not executable
        os.chmod(path_minrpc, stat.S_IRUSR)
        with pytest.raises(RuntimeError):
            rpc.PopenSession(path_minrpc)

    def check_remote_link_cl(remote):
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.testing.device_enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cl(0)
        s = te.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, te.thread_axis("blockIdx.x"))
        s[B].bind(xi, te.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=102).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(102, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote(rpc.LocalSession())
    check_remote(client)
    check_minrpc()
Esempio n. 29
0
def test_rpc_return_func():
    server = rpc.Server(key="x1")
    client = rpc.connect("127.0.0.1", server.port, key="x1")
    f1 = client.get_function("rpc.test.add_to_lhs")
    fadd = f1(10)
    assert fadd(12) == 22
Esempio n. 30
0
def deploy_rpc():
    """Runs the demo that deploys a model remotely through RPC.
    """
    from tvm import rpc
    from tvm.contrib import util, emscripten

    # As usual, load the resnet18 model.
    net, params, data_shape, out_shape = load_mxnet_resnet()

    # Compile the model.
    # Note that this time we are changing the target.
    # This is because we want to translate the host library into JavaScript
    # through Emscripten.
    graph, lib, params = compile_net(
        net,
        target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
        target="opengl",
        data_shape=data_shape,
        params=params)

    # Now we want to deploy our model through RPC.
    # First we ned to prepare the module files locally.
    print("Saving the compiled module...")

    temp = util.tempdir()
    path_obj = temp.relpath("deploy.bc") # host LLVM part
    path_dso = temp.relpath("deploy.js") # host JavaScript part
    path_gl = temp.relpath("deploy.gl") # device GLSL part
    path_json = temp.relpath("deploy.tvm_meta.json")

    lib.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    lib.imported_modules[0].save(path_gl)

    print("- Saved files:", temp.listdir())

    # Connect to the RPC server.
    print("Connecting to RPC server...")
    proxy_host = 'localhost'
    proxy_port = 9090
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    print("- Connected to RPC server!")

    # Upload module to RPC server.
    print("Uploading module to RPC server...")
    remote.upload(path_dso, "deploy.dso")
    remote.upload(path_gl)
    remote.upload(path_json)
    print("- Upload completed!")

    # Load remote library.
    print("Loading remote library...")
    fdev = remote.load_module("deploy.gl")
    fhost = remote.load_module("deploy.dso")
    fhost.import_module(fdev)
    rlib = fhost
    print("- Remote library loaded!")

    ctx = remote.opengl(0)

    # Upload the parameters.
    print("Uploading parameters...")
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
    print("- Parameters uploaded!")

    # Create the remote runtime module.
    print("Running remote module...")
    from tvm.contrib import graph_runtime
    module = graph_runtime.create(graph, rlib, ctx)

    # Set parameter.
    module.set_input(**rparams)

    # Set input data.
    input_data = np.random.uniform(size=data_shape)
    module.set_input('data', tvm.nd.array(input_data.astype('float32')))

    # Run.
    module.run()
    print("- Remote module execution completed!")

    out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx))
    # Print first 10 elements of output.
    print(out.asnumpy()[0][0:10])
Esempio n. 31
0
def reconfig_rpc_runtime():
    """Reconfig the RPC server runtime
    """
    assert tvm.module.enabled("rpc")
    remote = rpc.connect(host, port)
    reconfig_runtime(remote)
    A = tvm.placeholder((n, ), name='a')
    B = tvm.placeholder((n, ), name='b')
    C = tvm.compute(A.shape, lambda i: A[i] + B[i], name='c')
    return A, B, C


A, B, C = vector_add(n)

s = tvm.create_schedule(C.op)

mod = tvm.build(s, [A, B, C])

mod_fname = 'vector-add.tar'
mod.export_library(mod_fname)

if target_url != '0.0.0.0':
    remote = rpc.connect(url=target_url, port=target_port)
else:
    remote = rpc.LocalSession()
# Even if running a pretrained model in remote machine, it only need upload `mod_fname`
remote.upload(mod_fname)
remote_mod = remote.load_module(mod_fname)

ctx = remote.cpu()
a = tvm.nd.array(np.array([1, 2, 3], dtype=dtype), ctx=ctx)
b = tvm.nd.array(np.array([4, 5, 6], dtype=dtype), ctx=ctx)
c = tvm.nd.empty(b.shape)

remote_mod(a, b, c)
print(c.asnumpy())
Esempio n. 33
0
def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it."
        )

    # Currently only two package formats are supported: "classic" and
    # "mlf". The later can only be used for micro targets, i.e. with µTVM.
    if tvmc_package.type == "mlf":
        raise TVMCException(
            "You're trying to run a model saved using the Model Library Format (MLF)."
            "MLF can only be used to run micro targets (µTVM)."
        )

    if hostname:
        if isinstance(port, str):
            port = int(port)
        # Remote RPC
        if rpc_key:
            logger.debug("Running on remote RPC tracker with key %s.", rpc_key)
            session = request_remote(rpc_key, hostname, port, timeout=1000)
        else:
            logger.debug("Running on remote RPC with no key.")
            session = rpc.connect(hostname, port)
    else:
        # Local
        logger.debug("Running a local session.")
        session = rpc.LocalSession()

    session.upload(tvmc_package.lib_path)
    lib = session.load_module(tvmc_package.lib_name)

    # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
    logger.debug("Device is %s.", device)
    if device == "cuda":
        dev = session.cuda()
    elif device == "cl":
        dev = session.cl()
    else:
        assert device == "cpu"
        dev = session.cpu()

    if profile:
        logger.debug("Creating runtime with profiling enabled.")
        module = debug_executor.create(tvmc_package.graph, lib, dev, dump_root="./prof")
    else:
        logger.debug("Creating runtime with profiling disabled.")
        module = runtime.create(tvmc_package.graph, lib, dev)

    logger.debug("Loading params into the runtime module.")
    module.load_params(tvmc_package.params)

    shape_dict, dtype_dict = get_input_info(tvmc_package.graph, tvmc_package.params)
    inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs, fill_mode)

    logger.debug("Setting inputs to the module.")
    module.set_input(**inputs_dict)

    # Run must be called explicitly if profiling
    if profile:
        logger.info("Running the module with profiling enabled.")
        module.run()

    # create the module time evaluator (returns a function)
    timer = module.module.time_evaluator("run", dev, number=number, repeat=repeat)
    # call the evaluator function to invoke the module and save execution times
    prof_result = timer()
    # collect a list of execution times from the profiling results
    times = prof_result.results

    logger.debug("Collecting the output tensors.")
    num_outputs = module.get_num_outputs()
    outputs = {}
    for i in range(num_outputs):
        output_name = "output_{}".format(i)
        outputs[output_name] = module.get_output(i).numpy()

    return TVMCResult(outputs, times)
Esempio n. 34
0
def deploy_rpc():
    """Runs the demo that deploys a model remotely through RPC.
    """
    from tvm import rpc
    from tvm.contrib import util, emscripten

    # As usual, load the resnet18 model.
    net, params, data_shape, out_shape = load_mxnet_resnet()

    # Compile the model.
    # Note that this time we are changing the target.
    # This is because we want to translate the host library into JavaScript
    # through Emscripten.
    graph, lib, params = compile_net(
        net,
        target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
        target="opengl",
        data_shape=data_shape,
        params=params)

    # Now we want to deploy our model through RPC.
    # First we ned to prepare the module files locally.
    print("Saving the compiled module...")

    temp = util.tempdir()
    path_obj = temp.relpath("deploy.bc") # host LLVM part
    path_dso = temp.relpath("deploy.js") # host JavaScript part
    path_gl = temp.relpath("deploy.gl") # device GLSL part
    path_json = temp.relpath("deploy.tvm_meta.json")

    lib.save(path_obj)
    emscripten.create_js(path_dso, path_obj, side_module=True)
    lib.imported_modules[0].save(path_gl)

    print("- Saved files:", temp.listdir())

    # Connect to the RPC server.
    print("Connecting to RPC server...")
    proxy_host = 'localhost'
    proxy_port = 9090
    remote = rpc.connect(proxy_host, proxy_port, key="js")
    print("- Connected to RPC server!")

    # Upload module to RPC server.
    print("Uploading module to RPC server...")
    remote.upload(path_dso, "deploy.dso")
    remote.upload(path_gl)
    remote.upload(path_json)
    print("- Upload completed!")

    # Load remote library.
    print("Loading remote library...")
    fdev = remote.load_module("deploy.gl")
    fhost = remote.load_module("deploy.dso")
    fhost.import_module(fdev)
    rlib = fhost
    print("- Remote library loaded!")

    ctx = remote.opengl(0)

    # Upload the parameters.
    print("Uploading parameters...")
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
    print("- Parameters uploaded!")

    # Create the remote runtime module.
    print("Running remote module...")
    from tvm.contrib import graph_runtime
    module = graph_runtime.create(graph, rlib, ctx)

    # Set parameter.
    module.set_input(**rparams)

    # Set input data.
    input_data = np.random.uniform(size=data_shape)
    module.set_input('data', tvm.nd.array(input_data.astype('float32')))

    # Run.
    module.run()
    print("- Remote module execution completed!")

    out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx))
    # Print first 10 elements of output.
    print(out.asnumpy()[0][0:10])
Esempio n. 35
0
def reconfig_rpc_runtime():
    """Reconfig the RPC server runtime
    """
    assert tvm.runtime.enabled("rpc")
    remote = rpc.connect(host, port)
    reconfig_runtime(remote)
Esempio n. 36
0
from vta.testing import simulator

# Load VTA parameters from the vta/config/vta_config.json file
env = vta.get_env()

# We read the Pynq RPC host IP address and port number from the OS environment
host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))

# We configure both the bitstream and the runtime system on the Pynq
# to match the VTA configuration specified by the vta_config.json file.
if env.TARGET == "pynq":

    # Make sure that TVM was compiled with RPC=1
    assert tvm.module.enabled("rpc")
    remote = rpc.connect(host, port)

    # Reconfigure the JIT runtime
    vta.reconfig_runtime(remote)

    # Program the FPGA with a pre-compiled VTA bitstream.
    # You can program the FPGA with your own custom bitstream
    # by passing the path to the bitstream file instead of None.
    vta.program_fpga(remote, bitstream=None)

# In simulation mode, host the RPC server locally.
elif env.TARGET == "sim":
    remote = rpc.LocalSession()

######################################################################
# Computation Declaration
def test_rpc_remote_module():
    if not tvm.module.enabled("rpc"):
        return
    server = rpc.Server("localhost")
    client = rpc.connect(server.host, server.port)
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
    s = tvm.create_schedule(B.op)

    def check_remote(remote):
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cpu(0)
        f = tvm.build(s, [A, B], "llvm", name="myadd")
        path_dso = temp.relpath("dev_lib.so")
        f.export_library(path_dso)
        remote.upload(path_dso)
        f1 = remote.load_module("dev_lib.so")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        time_f = f1.time_evaluator(f1.entry_name, remote.cpu(0), number=10)
        cost = time_f(a, b).mean
        print('%g secs/op' % cost)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    def check_remote_link_cl(remote):
        """Test function to run remote code such as cl

        This is not enabled because there is forking issue
        of TVM runtime when server launches after OpenCL
        runtime initializes. We leave it as an example
        on how to do rpc when we want to do linking on remote.
        """
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        if not tvm.module.enabled("opencl"):
            print("Skip because opencl is not enabled")
            return
        temp = util.tempdir()
        ctx = remote.cl(0)
        s = tvm.create_schedule(B.op)
        xo, xi = s[B].split(B.op.axis[0], factor=32)
        s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
        s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
        f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
        # Option 1: save modules separately and rely on remote compiler
        path_o = temp.relpath("myadd.o")
        path_cl = temp.relpath("myadd.cl")
        path_json = temp.relpath("myadd.tvm_meta.json")
        f.save(path_o)
        f.imported_modules[0].save(path_cl)
        remote.upload(path_o)
        remote.upload(path_cl)
        # upload meta data
        remote.upload(path_json)
        fhost = remote.load_module("myadd.o")
        fdev = remote.load_module("myadd.cl")
        fhost.import_module(fdev)
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
        # Option 2: export library as a tar ball then handled by remote compiler
        path_tar = temp.relpath("myadd.tar")
        f.export_library(path_tar)
        remote.upload(path_tar)
        fhost = remote.load_module("myadd.tar")
        a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
        fhost(a, b)
        np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

    check_remote(client)
    check_remote(rpc.LocalSession())
Esempio n. 38
0
def test_gemm_gpu(N, times, bn, num_block, num_thread):
    assert(bn <= N)
    assert(num_thread * num_thread * 16 <= N)
    assert(num_block * num_block * 2 <= N)
    A = tvm.placeholder((N, N), name='A')
    B = tvm.placeholder((N, N), name='Btmp')
    k = tvm.reduce_axis((0, N), name='k')

    packedB = tvm.compute((N, N / bn, bn),
              lambda x, y, z: B[x, y * bn + z], name = 'B')

    C = tvm.compute(
        (N, N),
        lambda ii, jj: tvm.sum(A[ii, k] * packedB[k, jj / bn, jj % bn], axis=k),
        name='C')

    s = tvm.create_schedule(C.op)
    CC = s.cache_write(C, "local")

    block_x = tvm.thread_axis("blockIdx.x")
    block_y = tvm.thread_axis("blockIdx.y")
    thread_x = tvm.thread_axis("threadIdx.x")
    thread_y = tvm.thread_axis("threadIdx.y")

    thread_xz = tvm.thread_axis((0, 2), "vthread", name="vx")
    thread_yz = tvm.thread_axis((0, 2), "vthread", name="vy")

    pby, pbi = s[packedB].split(packedB.op.axis[0], nparts=num_thread)
    pbx, pbj = s[packedB].split(packedB.op.axis[1], nparts=num_thread)
    s[packedB].bind(pby, thread_y)
    s[packedB].bind(pbx, thread_x)
    pbz, pbk = s[packedB].split(packedB.op.axis[2], factor=8)
    s[packedB].vectorize(pbk)

    by, yi = s[C].split(C.op.axis[0], nparts=num_block)
    bx, xi = s[C].split(C.op.axis[1], nparts=num_thread)

    s[C].bind(by, block_y)
    s[C].bind(bx, thread_y)
    s[C].reorder(by, bx, yi, xi)

    tyz, yi = s[C].split(yi, nparts=2)
    ty, yi = s[C].split(yi, nparts=num_block)
    txz, xi = s[C].split(xi, nparts=2)
    tx, xi = s[C].split(xi, nparts=num_thread)

    s[C].reorder(tyz, txz, ty, tx, yi, xi)
    s[C].bind(tyz, thread_yz)
    s[C].bind(txz, thread_xz)

    s[C].bind(ty, block_x)
    s[C].bind(tx, thread_x)

    xyi, xxi = s[C].split(xi, factor=8)
    s[C].reorder(tyz, txz, ty, tx, yi, xyi, xxi)
    s[C].vectorize(xxi)

    s[CC].compute_at(s[C], yi)
    yo, xo = CC.op.axis
    s[CC].reorder(k, yo, xo)
    xo, xi = s[CC].split(xo, factor=8)
    s[CC].vectorize(xi)

    ko, ki = s[CC].split(k, factor=2)
    s[CC].unroll(ki)

    print(tvm.lower(s, [A, B, C], simple_mode=True))

    f = tvm.build(s, [A, B, C], "opencl", target_host=target, name="gemm_gpu")
    temp = util.tempdir()
    path_dso = temp.relpath("gemm_gpu.so")
    f.export_library(path_dso, ndk.create_shared)

    # connect to the proxy
    remote = rpc.connect(proxy_host, proxy_port, key=key)
    ctx = remote.cl(0)
    remote.upload(path_dso)
    f = remote.load_module("gemm_gpu.so")

    evaluate(f, ctx, N, times)
Esempio n. 39
0
dtype_dict = {input_name: data.dtype}

# parse Caffe2 model and convert into Relay computation graph
from tvm import relay
mod, params = relay.frontend.from_caffe2(resnet50.init_net, resnet50.predict_net, shape_dict, dtype_dict)

# compile the model
target = 'metal'
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod, target, target_host=target_host, params=params)

#Save the library 
temp = util.tempdir()
path_dso1 = temp.relpath("dev_lib.dylib")
lib.export_library(path_dso1, xcode.create_dylib,
				 arch=arch, sdk=sdk)
xcode.codesign(path_dso1)

# Start RPC test server that contains the compiled library.
server = xcode.popen_test_rpc(proxy_host, proxy_port, key,
							  destination=destination,
							  libs=[path_dso1])

# connect to the proxy
remote = rpc.connect(proxy_host, proxy_port, key=key)
ctx = remote.metal(0)
load_lib = remote.load_module("dev_lib.dylib")
module = graph_runtime.create(graph, loaded_lib, ctx) # This line is thrwoing error.
module.load_params(loaded_params)
caffe2_out = module.run(data=input_data)