def test_rpc_tracker_register(): # test registration tracker = Tracker('localhost', port=9000, port_end=10000) device_key = 'test_device' server = rpc.Server('localhost', port=9000, port_end=10000, key=device_key, tracker_addr=(tracker.host, tracker.port)) time.sleep(1) client = rpc.connect_tracker(tracker.host, tracker.port) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 1 remote = client.request(device_key) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 0 del remote time.sleep(1) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 1 server.terminate() time.sleep(1) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 0 tracker.terminate()
def test_rpc_tracker_register(): # test registration tracker = Tracker("localhost", port=9000, port_end=10000) device_key = "test_device" server = rpc.Server( "localhost", port=9000, port_end=10000, key=device_key, tracker_addr=(tracker.host, tracker.port), ) time.sleep(1) client = rpc.connect_tracker(tracker.host, tracker.port) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 1 remote = client.request(device_key) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 del remote time.sleep(1) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 1 server.terminate() time.sleep(1) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 tracker.terminate()
def request_remote(device_key, host=None, port=None, priority=1, timeout=60): """Request a remote session Parameters ---------- device_key: string The device key of registered device in tracker host: host, optional The host address of rpc tracker. If is none, will use environment variable "TVM_TRACKER_HOST" port: int, optional The port of rpc tracker. If is none, will use environment variable "TVM_TRACKER_PORT" priority: int, optional The priority of this request, larger is more prior timeout: float, optional The timeout of this session (units: second) Returns ------ session: RPCSession """ # connect to the tracker host = host or os.environ['TVM_TRACKER_HOST'] port = port or int(os.environ['TVM_TRACKER_PORT']) tracker = _rpc.connect_tracker(host, port) remote = tracker.request(device_key, priority=priority, session_timeout=timeout) return remote
def request_remote(device_key, host=None, port=None, priority=1, timeout=60): """Request a remote session. Parameters ---------- device_key : str The device key of registered device in tracker. host : Optional[str] The host address of rpc tracker. If is none, will use environment variable "TVM_TRACKER_HOST". port : Optional[int] The port of rpc tracker. If is none, will use environment variable "TVM_TRACKER_PORT". priority : int = 1 The priority of this request, larger is more prior. timeout : int = 60 The timeout of this session in second. Returns ------- remote : RPCSession The connected remote RPCSession. """ # connect to the tracker host = host or os.environ["TVM_TRACKER_HOST"] port = port or int(os.environ["TVM_TRACKER_PORT"]) tracker = rpc.connect_tracker(host, port) remote = tracker.request(device_key, priority=priority, session_timeout=timeout) return remote
def rpc_sess(android_tracker_key, tvm_tracker_host, tvm_tracker_port): from tvm import rpc tracker = rpc.connect_tracker(tvm_tracker_host, tvm_tracker_port) remote = tracker.request(android_tracker_key, priority=0, session_timeout=600) return remote
def main(): intrinsic_filename = 'kernel.c' aux_sources = [f"{os.path.realpath(intrinsic_filename)}"] aux_options = [f"-I{os.path.dirname(os.path.realpath(intrinsic_filename))}"] s, bufs = TTM(N, M, K, L, dtype) print("Build function...") func = tvm.build(s, bufs, target=target) print(func.get_source()) tmp_dir = util.tempdir() mod_path = tmp_dir.relpath('micro.obj') compile_micro_mod(mod_path, func, micro_device_config, aux_sources=aux_sources, aux_options=aux_options) tracker = rpc.connect_tracker(rpc_server, rpc_port) remote = tracker.request(device_key, priority=1, session_timeout=10000) print('Uploading', mod_path) remote.upload(mod_path) func = remote.load_module(os.path.split(mod_path)[1]) ctx = remote.device(target, 0) time_f = func.time_evaluator(func.entry_name, ctx) print('Creating buffers on device') A_np = np.random.uniform(min_input, max_input, [N, M, K]).astype(dtype) B_np = np.random.uniform(min_input, max_input, [K, L]).astype(dtype) print("A input") print(A_np) print("B input") print(B_np) a = tvm.nd.array(A_np, ctx, copy=verify) b = tvm.nd.array(B_np, ctx, copy=verify) c = tvm.nd.empty([N, M, L], dtype, ctx) print('Running function on device') costs = time_f(a, b, c).results print('Best result cost:', costs) if verify: print('Checking correctness...') golden = TTM_rounding(A_np, B_np) print("C=") print(c.asnumpy()) print("golden=") print(golden) tvm.testing.assert_allclose(c.asnumpy(), golden) print('Micro correctness check passed.') else: print('Micro correctness check skipped.')
def test_rpc_module(): # graph n = tvm.convert(1024) A = tvm.placeholder((n, ), name='A') B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') temp = util.tempdir() s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, tvm.thread_axis("threadIdx.x")) s[B].bind(xo, tvm.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd") path_dso1 = temp.relpath("dev_lib2.so") f.export_library(path_dso1, ndk.create_shared) s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") s[B].pragma(xi, "parallel_barrier_when_finish") f = tvm.build(s, [A, B], target, name="myadd_cpu") path_dso2 = temp.relpath("cpu_lib.so") f.export_library(path_dso2, ndk.create_shared) tracker = rpc.connect_tracker(tracker_host, tracker_port) remote = tracker.request(key, priority=0, session_timeout=60) print('Run CPU test ...') ctx = remote.cpu(0) remote.upload(path_dso2) f2 = remote.load_module("cpu_lib.so") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f2.time_evaluator(f2.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) print('Run GPU test ...') ctx = remote.cl(0) remote.upload(path_dso1) f1 = remote.load_module("dev_lib2.so") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def test_rpc_tracker_request(): # test concurrent request tracker = Tracker("localhost", port=9000, port_end=10000) device_key = "test_device" server = rpc.Server( "localhost", port=9000, port_end=10000, key=device_key, tracker_addr=(tracker.host, tracker.port), ) client = rpc.connect_tracker(tracker.host, tracker.port) def target(host, port, device_key, timeout): client = rpc.connect_tracker(host, port) remote = client.request(device_key, session_timeout=timeout) while True: pass remote.cpu() proc1 = multiprocessing.Process(target=target, args=(tracker.host, tracker.port, device_key, 4)) proc2 = multiprocessing.Process(target=target, args=(tracker.host, tracker.port, device_key, 200)) proc1.start() time.sleep(0.5) proc2.start() time.sleep(0.5) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 assert summary["queue_info"][device_key]["pending"] == 1 proc1.terminate() proc1.join() time.sleep(0.5) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 assert summary["queue_info"][device_key]["pending"] == 0 proc2.terminate() proc2.join() server.terminate() tracker.terminate()
def get_remote(self): remote = None if self.use_rpc == "tracker": tracker = rpc.connect_tracker(self.host, self.port) if (self.device_key.find("android") == 0): cmds = [ "adb reverse tcp:9190 tcp:9190", "adb forward tcp:5001 tcp:5001", "adb shell am start -n org.apache.tvm.tvmrpc/org.apache.tvm.tvmrpc.MainActivity 1> /dev/null 2> /dev/null", ] os.system("; ".join(cmds)) remote = tracker.request(self.device_key, session_timeout=self.sess_timeout) elif self.use_rpc == "server": remote = rpc.connect(self.host, self.port, session_timeout=self.sess_timeout) return remote
def test_tasks(target): tracker = rpc.connect_tracker(tracker_host, tracker_port) remote = tracker.request(device_key) ctx = remote.cpu() with autotvm.apply_history_best(log_file): with tvm.target.create(target): print("Build funcs...") funcs = config_funcs(task_name, ctx) print("Run...") for i, f in enumerate(funcs): func = f[0] tensors = f[1] print("Export...") tmp = tempdir() if tuning_option['measure_option'][ 'build_func'] == 'ndk': # for android from tvm.contrib import ndk filename = func.entry_name + str(i) + ".so" path = tmp.relpath(filename) func.export_library(path, ndk.create_shared) else: filename = func.entry_name + str(i) + ".tar" path = tmp.relpath(filename) func.export_library(path) # upload module to device print("Upload...") remote.upload(path) rlib = remote.load_module(filename) # evaluate print("Evaluate inference time cost...") ftimer = rlib.time_evaluator(rlib.entry_name, ctx, number=1, repeat=100) prof_res = np.array(ftimer(*tensors).results) print("Task %d Mean inference time (std dev): %f s (%f s)" % (i, np.mean(prof_res), np.std(prof_res)))
def test_rpc_tracker_request(): # test concurrent request tracker = Tracker('localhost', port=9000, port_end=10000) device_key = 'test_device' server = rpc.Server('localhost', port=9000, port_end=10000, key=device_key, tracker_addr=(tracker.host, tracker.port)) client = rpc.connect_tracker(tracker.host, tracker.port) def target(host, port, device_key, timeout): client = rpc.connect_tracker(host, port) remote = client.request(device_key, session_timeout=timeout) while True: pass remote.cpu() proc1 = multiprocessing.Process(target=target, args=(tracker.host, tracker.port, device_key, 4)) proc2 = multiprocessing.Process(target=target, args=(tracker.host, tracker.port, device_key, 200)) proc1.start() time.sleep(0.5) proc2.start() time.sleep(0.5) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 0 assert summary['queue_info'][device_key]['pending'] == 1 proc1.terminate() proc1.join() time.sleep(0.5) summary = client.summary() assert summary['queue_info'][device_key]['free'] == 0 assert summary['queue_info'][device_key]['pending'] == 0 proc2.terminate() proc2.join() server.terminate() tracker.terminate()
def test_rpc_tracker_request(): # test concurrent request tracker = Tracker(port=9000, port_end=10000) device_key = "test_device" server = rpc.Server( port=9000, port_end=10000, key=device_key, tracker_addr=("127.0.0.1", tracker.port), ) client = rpc.connect_tracker("127.0.0.1", tracker.port) proc1 = multiprocessing.Process(target=_target, args=("127.0.0.1", tracker.port, device_key, 4)) proc2 = multiprocessing.Process(target=_target, args=("127.0.0.1", tracker.port, device_key, 200)) proc1.start() time.sleep(0.5) proc2.start() time.sleep(0.5) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 assert summary["queue_info"][device_key]["pending"] == 1 proc1.terminate() proc1.join() time.sleep(0.5) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 assert summary["queue_info"][device_key]["pending"] == 0 proc2.terminate() proc2.join() server.terminate() tracker.terminate()
def test_rpc_tracker_via_proxy(): """ tracker / \ Host -- Proxy -- RPC server """ device_key = "test_device" tracker_server = Tracker(port=9000, port_end=9100) proxy_server = Proxy( host=tracker_server.host, port=8888, port_end=8988, tracker_addr=(tracker_server.host, tracker_server.port), ) server1 = rpc.Server( host=proxy_server.host, port=proxy_server.port, key=device_key, tracker_addr=(tracker_server.host, tracker_server.port), is_proxy=True, ) server2 = rpc.Server( host=proxy_server.host, port=proxy_server.port, key=device_key, tracker_addr=(tracker_server.host, tracker_server.port), is_proxy=True, ) client = rpc.connect_tracker(tracker_server.host, tracker_server.port) remote1 = client.request(device_key, session_timeout=30) # pylint: disable=unused-variable remote2 = client.request(device_key, session_timeout=30) # pylint: disable=unused-variable server2.terminate() server1.terminate() proxy_server.terminate() tracker_server.terminate()
def __enter__(self): if self.device: # Already initialized return self tracker = _rpc.connect_tracker(self._remote_kw["host"], self._remote_kw["port"]) try: self._rpc = tracker.request( self._remote_kw["key"], priority=self._remote_kw["priority"], session_timeout=self._remote_kw["timeout"], session_constructor_args=[ "tvm.contrib.hexagon.create_hexagon_session", self._session_name, self._remote_stack_size_bytes, ], ) self.device = self._rpc.hexagon(0) return self except RuntimeError as exception: raise exception
def __enter__(self): if self._rpc: # Already initialized return self tracker = _rpc.connect_tracker(self._remote_kw["host"], self._remote_kw["port"]) try: self._rpc = tracker.request( self._remote_kw["key"], priority=self._remote_kw["priority"], session_timeout=self._remote_kw["timeout"], session_constructor_args=[ "tvm.contrib.hexagon.create_hexagon_session", self._session_name, self._remote_stack_size_bytes, os.environ.get("HEXAGON_SIM_ARGS", ""), self._rpc_receive_buffer_size_bytes, ], ) return self except RuntimeError as exception: raise exception
def test_run_gtests(gtest_args): if ("TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ and "TVM_TRACKER_KEY" in os.environ): rpc_tracker_host = os.environ["TVM_TRACKER_HOST"] rpc_tracker_port = os.environ["TVM_TRACKER_PORT"] rpc_tracker_port = int(rpc_tracker_port) rpc_key = os.environ["TVM_TRACKER_KEY"] tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) rpc_connection = tracker.request(rpc_key, priority=0, session_timeout=600) else: rpc_connection = rpc.LocalSession() try: func = rpc_connection.get_function("opencl.run_gtests") except: print( "This test requires TVM Runtime to be built with a OpenCL gtest version using OpenCL API cmake flag -DUSE_OPENCL_GTEST=/path/to/opencl/googletest/gtest" ) raise gtest_error_code = func(gtest_args) np.testing.assert_equal(gtest_error_code, 0)
def target(host, port, device_key, timeout): client = rpc.connect_tracker(host, port) remote = client.request(device_key, session_timeout=timeout) while True: pass remote.cpu()
def _target(host, port, device_key, timeout): client = rpc.connect_tracker(host, port) remote = client.request(device_key, session_timeout=timeout) while True: pass remote.cpu()
def _connect(): nonlocal tracker tracker = rpc.connect_tracker(self.tracker_host, self.tracker_port)
def build_run_compare(tvm_mod, params1, input_shape, dtype="float32", target="llvm", gpu_preprocess=None): if "TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ: rpc_tracker_host = os.environ["TVM_TRACKER_HOST"] rpc_tracker_port = os.environ["TVM_TRACKER_PORT"] run_on_host = 0 target_host = "llvm -mtriple=arm64-linux-android" rpc_tracker_port = int(rpc_tracker_port) else: run_on_host = 1 target_host = "llvm" if gpu_preprocess: tvm_mod_nchwc = gpu_preprocess(tvm_mod) else: tvm_mod_nchwc = tvm_mod with relay.build_config(opt_level=3): graph, lib, params = relay.build(tvm_mod_nchwc, target_host=target_host, target=target, params=params1) if run_on_host: ctx = tvm.opencl() m = graph_runtime.create(graph, lib, ctx) else: from tvm import rpc from tvm.contrib import utils, ndk rpc_key = "android" tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) remote = tracker.request(rpc_key, priority=0, session_timeout=600) temp = utils.tempdir() dso_binary = "dev_lib_cl.so" dso_binary_path = temp.relpath(dso_binary) ctx = remote.cl(0) lib.export_library(dso_binary_path, ndk.create_shared) remote.upload(dso_binary_path) rlib = remote.load_module(dso_binary) m = graph_runtime.create(graph, rlib, ctx) m.set_input(**params) inputs = [] if isinstance(input_shape, dict): for key in input_shape: inputs.append( np.random.normal(size=input_shape[key]).astype(dtype)) m.set_input(key, inputs[-1]) else: inputs.append(np.random.normal(size=input_shape).astype(dtype)) m.set_input("data", inputs[-1]) m.run() ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs) for i, ref_output in enumerate(ref_outputs): tvm_output = m.get_output(i) output = tvm_output.asnumpy() # for index, x in np.ndenumerate(ref_output): # if abs(output[index] - x) > 0.01: # print(index, output[index], x) np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1)
def test_rpc_tracker_register(): # test registration tracker = Tracker(port=9000, port_end=10000) device_key = "test_device" server1 = rpc.Server( host="127.0.0.1", port=9000, port_end=10000, key=device_key, tracker_addr=("127.0.0.1", tracker.port), ) server2 = rpc.Server( host="127.0.0.1", port=9000, port_end=10000, key=device_key, tracker_addr=("127.0.0.1", tracker.port), custom_addr= "test_addr", # this is a test address, which is unable to connect ) time.sleep(1) client = rpc.connect_tracker("127.0.0.1", tracker.port) def exist_address(summary, key, host, port): server_info = summary["server_info"] for device in server_info: if device["key"] == "server:%s" % key: addr = device["addr"] if (host is None or host == addr[0]) and port == addr[1]: return True return False summary = client.summary() assert summary["queue_info"][device_key]["free"] == 2 assert exist_address(summary, device_key, "127.0.0.1", server1.port) assert exist_address(summary, device_key, "test_addr", server2.port) remote = client.request(device_key) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 1 del remote time.sleep(1) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 2 server1.terminate() time.sleep(1) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 1 assert not exist_address(summary, device_key, "127.0.0.1", server1.port) assert exist_address(summary, device_key, "test_addr", server2.port) server2.terminate() time.sleep(1) summary = client.summary() assert summary["queue_info"][device_key]["free"] == 0 assert not exist_address(summary, device_key, "test_addr", server2.port) tracker.terminate()
def build_run_compare(tvm_mod, params1, input_shape, dtype="float32", target="llvm"): rpc_tracker_host = os.environ["TVM_TRACKER_HOST"] rpc_tracker_port = os.environ["TVM_TRACKER_PORT"] if rpc_tracker_host: run_on_host = 0 target_host = "llvm -mtriple=arm64-linux-android" rpc_tracker_port = int(rpc_tracker_port) else: run_on_host = 1 target_host = "llvm" layout_config = relay.transform.LayoutConfig() desired_layouts = {"nn.conv2d": ["NCHW4c", "OIHW4o"]} with layout_config: seq = tvm.transform.Sequential( [relay.transform.ConvertLayout(desired_layouts)]) with tvm.transform.PassContext(opt_level=3): tvm_mod_nchwc = seq(tvm.IRModule.from_expr(tvm_mod)) with relay.build_config(opt_level=3): graph, lib, params = relay.build(tvm_mod_nchwc, target_host=target_host, target=target, params=params1) if run_on_host: ctx = tvm.opencl() m = graph_runtime.create(graph, lib, ctx) else: from tvm import rpc from tvm.contrib import utils, ndk rpc_key = "android" tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) remote = tracker.request(rpc_key, priority=0, session_timeout=600) temp = utils.tempdir() dso_binary = "dev_lib_cl.so" dso_binary_path = temp.relpath(dso_binary) ctx = remote.cl(0) lib.export_library(dso_binary_path, ndk.create_shared) remote.upload(dso_binary_path) rlib = remote.load_module(dso_binary) m = graph_runtime.create(graph, rlib, ctx) m.set_input(**params) inputs = [] if isinstance(input_shape, dict): for key in input_shape: inputs.append( np.random.normal(size=input_shape[key]).astype(dtype)) m.set_input(key, inputs[-1]) else: inputs.append(np.random.normal(size=input_shape).astype(dtype)) m.set_input("data", inputs[-1]) m.run() ref_outputs = get_reference(tvm_mod, params1, input_shape, inputs) for i, ref_output in enumerate(ref_outputs): tvm_output = m.get_output(i) output = tvm_output.asnumpy() # for index, x in np.ndenumerate(ref_output): # if abs(output[index] - x) > 0.01: # print(index, output[index], x) np.testing.assert_allclose(output, ref_output, rtol=1e-2, atol=1e-2)
def check_server_drop(): """test when server drops""" try: from tvm.rpc import tracker, proxy, base from tvm.rpc.base import TrackerCode @tvm.register_func("rpc.test2.addone") def addone(x): return x + 1 def _put(tclient, value): base.sendjson(tclient._sock, value) base.recvjson(tclient._sock) tserver = tracker.Tracker("localhost", 8888) tproxy = proxy.Proxy("localhost", 8881, tracker_addr=("localhost", tserver.port)) tclient = rpc.connect_tracker("localhost", tserver.port) server0 = rpc.Server( "localhost", port=9099, tracker_addr=("localhost", tserver.port), key="abc") server1 = rpc.Server( "localhost", port=9099, tracker_addr=("localhost", tserver.port), key="xyz") server2 = rpc.Server( "localhost", tproxy.port, is_proxy=True, key="xyz") server3 = rpc.Server( "localhost", tproxy.port, is_proxy=True, key="xyz1") # Fault tolerence to un-handled requested value _put(tclient, [TrackerCode.REQUEST, "abc", "", 1]) _put(tclient, [TrackerCode.REQUEST, "xyz1", "", 1]) # Fault tolerence to stale worker value _put(tclient, [TrackerCode.PUT, "xyz", (server1.port, "abc")]) _put(tclient, [TrackerCode.PUT, "xyz", (server1.port, "abcxxx")]) _put(tclient, [TrackerCode.PUT, "xyz", (tproxy.port, "abcxxx11")]) # Fault tolerence server timeout def check_timeout(timeout, sleeptime): def myfunc(remote): time.sleep(sleeptime) f1 = remote.get_function("rpc.test2.addone") assert f1(10) == 11 try: tclient.request_and_run("xyz", myfunc, session_timeout=timeout) except RuntimeError: pass print(tclient.text_summary()) try: remote = tclient.request("xyz", priority=0, session_timeout=timeout) remote2 = tclient.request("xyz", session_timeout=timeout) time.sleep(sleeptime) f1 = remote.get_function("rpc.test2.addone") assert f1(10) == 11 f1 = remote2.get_function("rpc.test2.addone") assert f1(10) == 11 except tvm.TVMError as e: pass remote3 = tclient.request("abc") f1 = remote3.get_function("rpc.test2.addone") remote3 = tclient.request("xyz1") f1 = remote3.get_function("rpc.test2.addone") assert f1(10) == 11 check_timeout(0.01, 0.1) check_timeout(2, 0) tserver.terminate() server0.terminate() server1.terminate() server2.terminate() server3.terminate() tproxy.terminate() except ImportError: print("Skip because tornado is not available")
def build_run_compare( tvm_mod, params1, input_shape, dtype="float32", target="llvm", static_mem_scopes=[], gpu_preprocess=None, stat_file=None, ): if "TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ: rpc_tracker_host = os.environ["TVM_TRACKER_HOST"] rpc_tracker_port = os.environ["TVM_TRACKER_PORT"] run_on_host = 0 target_host = "llvm -mtriple=arm64-linux-android" rpc_tracker_port = int(rpc_tracker_port) else: run_on_host = 1 target_host = "llvm" if gpu_preprocess: tvm_mod_nchwc = gpu_preprocess(tvm_mod) else: tvm_mod_nchwc = tvm_mod if stat_file is not None: with autotvm.apply_history_best(stat_file): with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build( tvm_mod_nchwc, target_host=target_host, target=target, params=params1 ) else: with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build( tvm_mod_nchwc, target_host=target_host, target=target, params=params1 ) # verification that storage_scope has expected textures scopes graph_json = json.loads(graph) if "storage_scope" in graph_json["attrs"]: assert ( len(static_mem_scopes) == len(graph_json["attrs"]["storage_scope"][1]) or len(static_mem_scopes) == 0 ) else: assert len(static_mem_scopes) == 0 for i in range(0, len(static_mem_scopes)): assert static_mem_scopes[i] == graph_json["attrs"]["storage_scope"][1][i] if run_on_host: ctx = tvm.opencl() m = graph_runtime.create(graph, lib, ctx) else: from tvm import rpc from tvm.contrib import utils, ndk rpc_key = "android" tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) remote = tracker.request(rpc_key, priority=0, session_timeout=600) temp = utils.tempdir() dso_binary = "dev_lib_cl.so" dso_binary_path = temp.relpath(dso_binary) ctx = remote.cl(0) lib.export_library(dso_binary_path, ndk.create_shared) remote.upload(dso_binary_path) rlib = remote.load_module(dso_binary) m = graph_runtime.create(graph, rlib, ctx) m.set_input(**params) inputs = [] if isinstance(input_shape, dict): for key in input_shape: inputs.append(np.random.normal(size=input_shape[key]).astype(dtype)) m.set_input(key, inputs[-1]) else: inputs.append(np.random.normal(size=input_shape).astype(dtype)) m.set_input("data", inputs[-1]) m.run() ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs) for i, ref_output in enumerate(ref_outputs): tvm_output = m.get_output(i) output = tvm_output.asnumpy() # for index, x in np.ndenumerate(ref_output): # if abs(output[index] - x) > 0.01: # print(index, output[index], x) np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1) return graph
shape_dict = {'input_1': data.shape} mod, params = relay.frontend.from_keras(keras_model, shape_dict) # keras -> tvm.module print(type(mod)) # build graph and params with relay.build_config(opt_level=3): graph, lib, params = relay.build(mod, target=target, target_host=target_host, params=params) lib.export_library("/Users/liuyuanqiang/Desktop/net.so", ndk.create_shared) # ndk # rpc tracker tracker = rpc.connect_tracker("0.0.0.0", 9190) remote = tracker.request("RedmiK30", priority=0, session_timeout=60) if target == "opencl": ctx = remote.cl(0) else: ctx = remote.cpu(0) remote.upload("/Users/liuyuanqiang/Desktop/net.so") rlib = remote.load_module('net.so') # run start = time.time() module = graph_runtime.create(graph, rlib, ctx) module.set_input(**params) module.set_input('input_1', tvm.nd.array(data.astype('float32'))) module.run() tvm_out = module.get_output(0)
def test_rpc_module(): # graph n = tvm.convert(1024) A = tvm.placeholder((n,), name='A') B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') a_np = np.random.uniform(size=1024).astype(A.dtype) temp = util.tempdir() # Establish remote connection with target hardware tracker = rpc.connect_tracker(tracker_host, tracker_port) remote = tracker.request(key, priority=0, session_timeout=60) # Compile the Graph for CPU target s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") s[B].pragma(xi, "parallel_barrier_when_finish") f = tvm.build(s, [A, B], target, name="myadd_cpu") path_dso_cpu = temp.relpath("cpu_lib.so") f.export_library(path_dso_cpu, ndk.create_shared) # Execute the portable graph on cpu target print('Run CPU test ...') ctx = remote.cpu(0) remote.upload(path_dso_cpu) f2 = remote.load_module("cpu_lib.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f2.time_evaluator(f2.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op\n' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # Compile the Graph for OpenCL target if test_opencl: s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, tvm.thread_axis("threadIdx.x")) s[B].bind(xo, tvm.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd") path_dso_cl = temp.relpath("dev_lib_cl.so") f.export_library(path_dso_cl, ndk.create_shared) print('Run GPU(OpenCL Flavor) test ...') ctx = remote.cl(0) remote.upload(path_dso_cl) f1 = remote.load_module("dev_lib_cl.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op\n' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # Compile the Graph for Vulkan target if test_vulkan: s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, tvm.thread_axis("threadIdx.x")) s[B].bind(xo, tvm.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "vulkan", target_host=target, name="myadd") path_dso_vulkan = temp.relpath("dev_lib_vulkan.so") f.export_library(path_dso_vulkan, ndk.create_shared) print('Run GPU(Vulkan Flavor) test ...') ctx = remote.vulkan(0) remote.upload(path_dso_vulkan) f1 = remote.load_module("dev_lib_vulkan.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op\n' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def test_rpc_module(): # graph n = tvm.runtime.convert(1024) A = te.placeholder((n, ), name="A") B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B") a_np = np.random.uniform(size=1024).astype(A.dtype) temp = util.tempdir() # Establish remote connection with target hardware tracker = rpc.connect_tracker(tracker_host, tracker_port) remote = tracker.request(key, priority=0, session_timeout=60) # Compile the Graph for CPU target s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") s[B].pragma(xi, "parallel_barrier_when_finish") f = tvm.build(s, [A, B], target, name="myadd_cpu") path_dso_cpu = temp.relpath("cpu_lib.so") f.export_library(path_dso_cpu, ndk.create_shared) # Execute the portable graph on cpu target print("Run CPU test ...") ctx = remote.cpu(0) remote.upload(path_dso_cpu) f2 = remote.load_module("cpu_lib.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f2.time_evaluator(f2.entry_name, ctx, number=10) cost = time_f(a, b).mean print("%g secs/op\n" % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # Compile the Graph for OpenCL target if test_opencl: s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, te.thread_axis("threadIdx.x")) s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "opencl", target_host=target, name="myadd") path_dso_cl = temp.relpath("dev_lib_cl.so") f.export_library(path_dso_cl, ndk.create_shared) print("Run GPU(OpenCL Flavor) test ...") ctx = remote.cl(0) remote.upload(path_dso_cl) f1 = remote.load_module("dev_lib_cl.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print("%g secs/op\n" % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # Compile the Graph for Vulkan target if test_vulkan: s = te.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, te.thread_axis("threadIdx.x")) s[B].bind(xo, te.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "vulkan", target_host=target, name="myadd") path_dso_vulkan = temp.relpath("dev_lib_vulkan.so") f.export_library(path_dso_vulkan, ndk.create_shared) print("Run GPU(Vulkan Flavor) test ...") ctx = remote.vulkan(0) remote.upload(path_dso_vulkan) f1 = remote.load_module("dev_lib_vulkan.so") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print("%g secs/op\n" % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
lib.export_library(lib_fname, fcompile) ###################################################################### # Deploy the Model Remotely by RPC # --------------------------------------------- # With RPC, you can deploy the model remotely from your host machine # to the remote android device. tracker_host = os.environ.get('TVM_TRACKER_HOST', '0.0.0.0') tracker_port = int(os.environ.get('TVM_TRACKER_PORT', 9190)) key = 'android' if local_demo: remote = rpc.LocalSession() else: tracker = rpc.connect_tracker(tracker_host, tracker_port) # When running a heavy model, we should increase the `session_timeout` remote = tracker.request(key, priority=0, session_timeout=60) if local_demo: ctx = remote.cpu(0) elif test_target == 'opencl': ctx = remote.cl(0) elif test_target == 'vulkan': ctx = remote.vulkan(0) else: ctx = remote.cpu(0) # upload the library to remote device and load it remote.upload(lib_fname)
lib.export_library(lib_fname, fcompile) ###################################################################### # Deploy the Model Remotely by RPC # --------------------------------------------- # With RPC, you can deploy the model remotely from your host machine # to the remote android device. tracker_host = os.environ.get('TVM_TRACKER_HOST', '0.0.0.0') tracker_port = int(os.environ.get('TVM_TRACKER_PORT', 9190)) key = 'android' if local_demo: remote = rpc.LocalSession() else: tracker = rpc.connect_tracker(tracker_host, tracker_port) # When running a heavy model, we should increase the `session_timeout` remote = tracker.request(key, priority=0, session_timeout=60) if local_demo: ctx = remote.cpu(0) elif test_target == 'opencl': ctx = remote.cl(0) elif test_target == 'vulkan': ctx = remote.vulkan(0) else: ctx = remote.cpu(0) # upload the library to remote device and load it remote.upload(lib_fname)