def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): print("skip because extern function is not available") return if not nnpack.is_available(): return ctx = tvm.cpu(0) transformed_kernel = nnpack.convolution_inference_weight_transform( kernel, algorithm=algorithm) output = nnpack.convolution_inference_without_weight_transform( data, transformed_kernel, bias if with_bias else None, [PAD, PAD, PAD, PAD], [STRIDE, STRIDE], algorithm=algorithm) s = tvm.create_schedule(output.op) f = tvm.build(s, [data, kernel, bias, output], target) na = np.random.uniform(size=dshape).astype(data.dtype) nb = np.random.uniform(size=kshape).astype(kernel.dtype) nc = np.random.uniform(size=bshape).astype(bias.dtype) if with_bias else np.zeros(bshape, dtype=bias.dtype) ta = tvm.nd.array(na, ctx) tb = tvm.nd.array(nb, ctx) tc = tvm.nd.array(nc, ctx) td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx) f(ta, tb, tc, td) nd = np_conv(np.reshape(na, (BATCH, IC, IH, IW)), nb, PAD, STRIDE) + nc.reshape(1, bshape[0], 1, 1) tvm.testing.assert_allclose( td.asnumpy(), nd.reshape(BATCH, IC, IH, IW), rtol=1e-5)
def check(factor): s = tvm.create_schedule(z.op) xo, xi = s[z].split(z.op.axis[0], factor=factor) vadd = intrin_vadd(factor) s[z].tensorize(xi, vadd) s = s.normalize() dom_map = tvm.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[z], dom_map) assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].extent, factor) assert tvm.ir_pass.Equal(out_dom[z.op.axis[0]].min, xo * factor) assert tvm.ir_pass.Equal(in_dom.items()[0][1][0].extent, factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[z], out_dom, in_dom, vadd) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(vadd.op.body[0])) stmt = tvm.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [x, y, z])
def test_conv2d(): in_channel = 3 out_channel = 64 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [1, in_channel, 128, 128] if not tvm.module.enabled("rocm"): print("skip because rocm is not enabled...") return if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True): print("skip because miopen is not enabled...") return wshape = (out_channel, in_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X') W = tvm.placeholder(wshape, name='W') Y = miopen.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0) yshape = [x.value for x in Y.shape] import topi with tvm.target.create("rocm -libs=miopen"): s = topi.generic.schedule_extern(Y) def verify(): ctx = tvm.rocm(0) f = tvm.build(s, [X, W, Y], "rocm", target_host="llvm", name="conv2d") x = tvm.nd.array(np.random.uniform(-1, 1, xshape).astype(np.float32), ctx) w = tvm.nd.array(np.random.uniform(-1, 1, wshape).astype(np.float32), ctx) y = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f(x, w, y) Y_ref = topi.nn.conv2d_nchw(X, W, (stride_h, stride_w), (pad_h, pad_w)) with tvm.target.rocm(): s_ref = topi.generic.schedule_conv2d_nchw([Y_ref]) f_ref = tvm.build(s_ref, [X, W, Y_ref], "rocm") y_ref = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f_ref(x, w, y_ref) print("Max abs diff:", np.max(np.abs(y.asnumpy() - y_ref.asnumpy()))) tvm.testing.assert_allclose(y.asnumpy(), y_ref.asnumpy(), atol=1e-3) verify()
def verify(A, B, C, target="llvm"): if not tvm.get_global_func("tvm.contrib.mps.conv2d", True): print("skip because extern function is not available") return ctx = tvm.metal(0) f = tvm.build(s1, [A, B, C], "metal") a = tvm.nd.array(np.random.uniform(size=(n, h, w, ci)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(co, kh, kw, ci)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, h // stride, w // stride, co), dtype=C.dtype), ctx) f(a, b, c)
def stats(): """Clear profiler statistics Returns ------- stats : dict Current profiler statistics """ x = tvm.get_global_func("vta.simulator.profiler_status")() return json.loads(x)
def check(factor): s = tvm.create_schedule(C.op) x, y = C.op.axis yo, yi = s[C].split(y, factor=factor) gemv = intrin_gemv(factor, l) s[C].tensorize(yi, gemv) s = s.normalize() dom_map = tvm.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) assert tvm.ir_pass.Equal(out_dom[y].extent, factor) assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) stmt = tvm.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C])
def verify(A, B, D, s, target="metal"): if not tvm.get_global_func("tvm.contrib.mps.matmul", True): print("skip because extern function is not available") return ctx = tvm.metal(0) f = tvm.build(s, [A, B, D], "metal") a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
def test_get_global(): targs = (10, 10.0, "hello") # register into global function table @tvm.register_func def my_packed_func(*args): assert(tuple(args) == targs) return 10 # get it out from global function table f = tvm.get_global_func("my_packed_func") assert isinstance(f, tvm.Function) y = f(*targs) assert y == 10
def test_conv2d(): in_channel = 3 out_channel = 32 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [4, 3, 32, 32] if not tvm.module.enabled("cuda"): print("skip because cuda is not enabled...") return if not tvm.get_global_func("tvm.contrib.cudnn.conv2d.output_shape", True): print("skip because cudnn is not enabled...") return wshape = cudnn.conv2d_w_shape(in_channel, out_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X') W = tvm.placeholder(wshape, name='W') Y = cudnn.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=1, tensor_format=0, algo=1) yshape = [x.value for x in Y.shape] s = tvm.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv2d") x = tvm.nd.array(np.random.uniform(-1, 1, xshape).astype(np.float32), ctx) w = tvm.nd.array(np.random.uniform(-1, 1, wshape).astype(np.float32), ctx) y = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f(x, w, y) verify()
def verify(target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.random.normal", True): print("skip because extern function is not available") return ctx = tvm.cpu(0) f = tvm.build(s, [A], target) a = tvm.nd.array(np.zeros((m, n), dtype=A.dtype), ctx) f(a) na = a.asnumpy() assert abs(np.mean(na) - 3) < 1e-2 assert abs(np.std(na) - 4) < 1e-2
def check_rfactor_no_reset_multi_reduction(factor, rfactor): s = tvm.create_schedule(C.op) x, y = C.op.axis rk = C.op.reduce_axis[0] yo, yi = s[C].split(y, factor=factor) ro, ri = s[C].split(rk, factor=rfactor) roo, roi = s[C].split(ro, factor=2) s[C].reorder(yo, roo, roi, yi, ri) gemv = intrin_gemv_no_reset(factor, rfactor) s[C].tensorize(yi, gemv) s = s.normalize() dom_map = tvm.schedule.InferBound(s) finfer = tvm.get_global_func("test.op.InferTensorizeRegion") out_dom, in_dom = finfer(s[C], dom_map) assert tvm.ir_pass.Equal(out_dom[x].extent, 1) assert tvm.ir_pass.Equal(out_dom[y].extent, factor) assert tvm.ir_pass.Equal(out_dom[y].min, yo * factor) fmatch = tvm.get_global_func("test.op.MatchTensorizeBody") body = fmatch(s[C], out_dom, in_dom, gemv) assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(body[0]), tvm.ir_pass.CanonicalSimplify(gemv.op.body[0])) stmt = tvm.schedule.ScheduleOps(s, dom_map) tvm.lower(s, [A, B, C])
def verify(target="rocm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.rocblas.matmul", True): print("skip because extern function is not available") return ctx = tvm.rocm(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose( c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5)
def verify(target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.cblas.matmul", True): print("skip because extern function is not avalable") return ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx) bb = 10.0 f(a, b, d, bb) np.testing.assert_allclose( d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + bb, rtol=1e-5)
def verify(target="rocm"): if not tvm.testing.device_enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func(lib.__name__ + ".batch_matmul", True): print("skip because extern function is not available") return dev = tvm.rocm(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), dev) c = tvm.nd.array(np.zeros((batch, m, n), dtype=C.dtype), dev) f(a, b, c) tvm.testing.assert_allclose(c.numpy(), get_numpy(a.numpy(), b.numpy(), transa, transb), rtol=1e-5)
def test_create_array_buffer_info(): target = Target("c") global_ws_pool = usmp_utils.PoolInfo( pool_name="global_workspace", target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS}, ) fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo") tir_mod = LinearStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule( tir_mod, [global_ws_pool]) main_func = tir_mod["tvmgen_default_run_model"] buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info( main_func, tir_mod) buffer_info_array = fcreate_array_bi(buffer_info_map) for buffer_info in buffer_info_array: assert buffer_info in buffer_info_map.keys()
def verify(A, B, D, s, bias, target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.mps.matmul", True): print("skip because extern function is not avalable") return ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx) bb = 10.0 f(a, b, d, bb) np.testing.assert_allclose(d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + bb, rtol=1e-5)
def test_relu(): """Test a subgraph with a single ReLU operator.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" shape = (1, 32, 14, 14) def gen_relu(): data0 = relay.var("data0", shape=shape, dtype=dtype) out = relay.nn.relu(data0) func = relay.Function([data0], out) func = set_func_attr(func, "dnnl", "dnnl_0") glb_var = relay.GlobalVar("dnnl_0") mod = tvm.IRModule() mod[glb_var] = func mod = transform.InferType()(mod) data0 = relay.var("data0", shape=shape, dtype=dtype) main_f = relay.Function([data0], glb_var(data0)) mod["main"] = main_f mod = transform.InferType()(mod) data0 = relay.var("data0", shape=shape, dtype=dtype) out = relay.nn.relu(data0) main_f = relay.Function([data0], out) ref_mod = tvm.IRModule() ref_mod["main"] = main_f ref_mod = transform.InferType()(ref_mod) return mod, ref_mod mod, ref_mod = gen_relu() data0 = np.random.uniform(-1, 1, shape).astype(dtype) check_result( mod, ref_mod, { "data0": data0, }, (1, 32, 14, 14), tol=1e-5, )
def init(hw_backend): """Init hardware and software shared library for accelerator Parameters ------------ hw_backend : str Hardware backend can be verilog or chisel """ cur_path = osp.dirname(osp.abspath(osp.expanduser(__file__))) hw_libname = "libhw" + get_ext() if hw_backend in ("verilog", "chisel"): hw_lib = osp.join(cur_path, "..", "hardware", hw_backend, "build", hw_libname) load_sw() m = tvm.runtime.load_module(hw_lib, "vta-tsim") f = tvm.get_global_func("tvm.vta.tsim.init") f(m)
def verify(target="llvm"): if not tvm.runtime.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func(lib.__name__ + ".matmul", True): print("skip because extern function is not available") return ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D], target) a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), ctx) f(a, b, d) tvm.testing.assert_allclose(d.asnumpy(), get_numpy(a.asnumpy(), b.asnumpy(), transa, transb), rtol=1e-5)
def test_remote(): if not tvm.runtime.enabled("tflite"): print("skip because tflite runtime is not enabled...") return if not tvm.get_global_func("tvm.tflite_runtime.create", True): print("skip because tflite runtime is not enabled...") return try: import tensorflow as tf except ImportError: print('skip because tensorflow not installed...') return tflite_fname = "model.tflite" tflite_model = _create_tflite_model() temp = util.tempdir() tflite_model_path = temp.relpath(tflite_fname) open(tflite_model_path, 'wb').write(tflite_model) # inference via tflite interpreter python apis interpreter = tf.lite.Interpreter(model_path=tflite_model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() input_shape = input_details[0]['shape'] tflite_input = np.array(np.random.random_sample(input_shape), dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], tflite_input) interpreter.invoke() tflite_output = interpreter.get_tensor(output_details[0]['index']) # inference via remote tvm tflite runtime server = rpc.Server("localhost") remote = rpc.connect(server.host, server.port) ctx = remote.cpu(0) a = remote.upload(tflite_model_path) with open(tflite_model_path, 'rb') as model_fin: runtime = tflite_runtime.create(model_fin.read(), remote.cpu(0)) runtime.set_input(0, tvm.nd.array(tflite_input, remote.cpu(0))) runtime.invoke() out = runtime.get_output(0) np.testing.assert_equal(out.asnumpy(), tflite_output) server.terminate()
def verify(target="cuda"): if not tvm.get_global_func("tvm.contrib.cublas.matmul", True): print("skip because extern function is not available") return ctx = tvm.gpu(0) f = tvm.build(s, [A, B, C], target) a = tvm.nd.array( np.random.uniform(size=(j, n, l)).astype(A.dtype), ctx) b = tvm.nd.array( np.random.uniform(size=(j, l, m)).astype(B.dtype), ctx) c = tvm.nd.array(np.zeros((j, n, m), dtype=C.dtype), ctx) f(a, b, c) tvm.testing.assert_allclose(c.asnumpy(), np.matmul(a.asnumpy().astype(C.dtype), b.asnumpy().astype( C.dtype)).astype(C.dtype), rtol=rtol)
def get_input_info(graph_str, params): """Return the 'shape' and 'dtype' dictionaries for the input tensors of a compiled module. .. note:: We can't simply get the input tensors from a TVM graph because weight tensors are treated equivalently. Therefore, to find the input tensors we look at the 'arg_nodes' in the graph (which are either weights or inputs) and check which ones don't appear in the params (where the weights are stored). These nodes are therefore inferred to be input tensors. Parameters ---------- graph_str : str JSON graph of the module serialized as a string. params : bytearray Params serialized as a bytearray. Returns ------- shape_dict : dict Shape dictionary - {input_name: tuple}. dtype_dict : dict dtype dictionary - {input_name: dtype}. """ shape_dict = {} dtype_dict = {} # Use a special function to load the binary params back into a dict load_arr = tvm.get_global_func("tvm.relay._load_param_dict")(params) param_names = [v.name for v in load_arr] graph = json.loads(graph_str) for node_id in graph["arg_nodes"]: node = graph["nodes"][node_id] # If a node is not in the params, infer it to be an input node name = node["name"] if name not in param_names: shape_dict[name] = graph["attrs"]["shape"][1][node_id] dtype_dict[name] = graph["attrs"]["dltype"][1][node_id] logger.debug("collecting graph input shape and type:") logger.debug("graph input shape: %s", shape_dict) logger.debug("graph input type: %s", dtype_dict) return shape_dict, dtype_dict
def test_extern_dnnl(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = 'float32' ishape = (1, 32, 14, 14) w1shape = (32, 1, 3, 3) data0 = relay.var('data0', shape=(ishape), dtype=dtype) weight0 = relay.var('weight0', shape=(w1shape), dtype=dtype) data1 = relay.var('data0', shape=(ishape), dtype=dtype) weight1 = relay.var('weight0', shape=(w1shape), dtype=dtype) weight2 = relay.var('weight1', shape=(w1shape), dtype=dtype) depthwise_conv2d_1 = relay.nn.conv2d(data1, weight1, kernel_size=(3, 3), padding=(1, 1), groups=32) depthwise_conv2d_2 = relay.nn.conv2d(depthwise_conv2d_1, weight2, kernel_size=(3, 3), padding=(1, 1), groups=32) out = relay.add(depthwise_conv2d_1, depthwise_conv2d_2) f = relay.Function([data1, weight1, weight2], out) ref_mod = tvm.IRModule() ref_mod['main'] = f f = set_external_func_attr(f, "dnnl", "dnnl_0") call = relay.Call(f, [data0, weight0, weight0]) mod = tvm.IRModule.from_expr(call) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w_data = np.random.uniform(0, 1, w1shape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu()) ref_res = ref_ex.evaluate()(i_data, w_data, w_data) check_result(mod, { "data0": i_data, "weight0": w_data }, (1, 32, 14, 14), ref_res.asnumpy(), tol=1e-5)
def verify(target="cuda"): if not tvm.get_global_func("tvm.contrib.cublaslt.matmul", True): print("skip because extern function is not available") return dev = tvm.gpu(0) f = tvm.build(s, [A, B, C], target) a_old = np.random.uniform(0, 128, size=(n, l)) b_old = np.random.uniform(0, 128, size=(l, m)) # Transform a to become CUBLASLT_ORDER_COL4_4R2_8C layout a_new = np.hstack((a_old.astype(A.dtype), np.zeros([n, L - l]))) a_new = np.vstack((a_new.astype(A.dtype), np.zeros([N - n, L]))) a_even = np.vsplit(a_new[::2], N / 8) a_odd = np.vsplit(a_new[1::2], N / 8) a_new = [None] * (len(a_even) + len(a_odd)) a_new[::2] = a_even a_new[1::2] = a_odd a_new = np.vstack(a_new) a_new = np.vstack( np.vstack( np.vstack(np.hsplit(i, 8)).reshape([4, 32]) for i in np.vsplit(j, N / 4)) for j in np.hsplit(a_new, L / 32)) a_new = a_new.reshape([N, L]) # Transform b to become CUBLASLT_ORDER_COL32 layout b_new = np.vstack( np.hsplit( np.hstack((b_old.T.astype(B.dtype), np.zeros([m, L - l]))), L / 32)) b_new = b_new.reshape([m, L]) a = tvm.nd.array(a_new.astype(A.dtype), dev) b = tvm.nd.array(b_new.astype(B.dtype), dev) c = tvm.nd.array(np.zeros((m, N_out), dtype=C.dtype), dev) f(a, b, c) # Transform output c from layout CUBLASLT_ORDER_COL32 to row major layout c_out = c.asnumpy() c_out = c_out.reshape([int(m * N_out / 32), 32]) c_out = np.hstack(np.vsplit(c_out, int(N_out / 32))) c_out = c_out[:, :n] c_out = c_out.T tvm.testing.assert_allclose(c_out, np.dot(a_old.astype(C.dtype), b_old.astype(C.dtype)), rtol=rtol)
def test_add(): """Test a subgraph with a single add operator.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" shape = (10, 10) def gen_add(): data0 = relay.var("data0", shape=shape, dtype=dtype) data1 = relay.var("data1", shape=shape, dtype=dtype) out = relay.add(data0, data1) func = relay.Function([data0, data1], out) func = set_func_attr(func, "dnnl", "dnnl_0") glb_var = relay.GlobalVar("dnnl_0") mod = tvm.IRModule() mod[glb_var] = func data0 = relay.var("data0", shape=shape, dtype=dtype) data1 = relay.var("data1", shape=shape, dtype=dtype) main_f = relay.Function([data0, data1], glb_var(data0, data1)) mod["main"] = main_f data0 = relay.var("data0", shape=shape, dtype=dtype) data1 = relay.var("data1", shape=shape, dtype=dtype) out = relay.add(data0, data1) main_f = relay.Function([data0, data1], out) ref_mod = tvm.IRModule() ref_mod["main"] = main_f return mod, ref_mod mod, ref_mod = gen_add() data0 = np.random.uniform(0, 1, shape).astype(dtype) data1 = np.random.uniform(0, 1, shape).astype(dtype) check_result(mod, ref_mod, { "data0": data0, "data1": data1 }, shape, tol=1e-5)
def test_get_callback_with_node(): x = tvm.convert(10) def test(y): assert y.handle != x.handle return y f2 = tvm.convert(test) # register into global function table @tvm.register_func def my_callback_with_node(y, f): assert y == x return f(y) # get it out from global function table f = tvm.get_global_func("my_callback_with_node") assert isinstance(f, tvm.Function) y = f(x, f2) assert(y.value == 10)
def test_rpc(dtype): if not tvm.get_global_func("tvm.contrib.random.random_fill", True): print("skip because extern function is not available") return if not tvm.testing.device_enabled("rpc") or not tvm.runtime.enabled("llvm"): return np_ones = np.ones((512, 512), dtype=dtype) server = rpc.Server("localhost") remote = rpc.connect(server.host, server.port) value = tvm.nd.empty(np_ones.shape, np_ones.dtype, remote.cpu()) random_fill = remote.get_function("tvm.contrib.random.random_fill") random_fill(value) assert np.count_nonzero(value.asnumpy()) == 512 * 512 # make sure arithmentic doesn't overflow too np_values = value.asnumpy() assert np.isfinite(np_values * np_values + np_values).any()
def test_create_array_buffer_info(): target = Target("c") global_ws_pool = WorkspacePoolInfo( "global_workspace", [target], ) fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo") tir_mod = LinearStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule( tir_mod, [global_ws_pool]) main_func = tir_mod["tvmgen_default_run_model"] buffer_info_analysis = tvm.tir.usmp.analysis.extract_buffer_info( main_func, tir_mod) buffer_info_array = fcreate_array_bi( buffer_info_analysis.buffer_info_stmts) for buffer_info in buffer_info_array: assert buffer_info in buffer_info_analysis.buffer_info_stmts.keys()
def verify(target="llvm"): if not tvm.get_global_func( "tvm.contrib.nnpack.fully_connected_inference", True): pytest.skip("extern function is not available") if not nnpack.is_available(): pytest.skip("nnpack is not available") ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.uniform(size=(l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(m, l)).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((m, ), dtype=D.dtype), ctx) bb = 10.0 f(a, b, d, bb) tvm.testing.assert_allclose(d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy().T) + bb, rtol=1e-5)
def test_run(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return ref_mod = annotated(dtype, ishape, w1shape) mod = annotated(dtype, ishape, w1shape) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu()) ref_res = ref_ex.evaluate()(i_data, w1_data) check_result( mod, {"data": i_data, "weight1": w1_data}, (1, 32, 14, 14), ref_res.asnumpy(), tol=1e-5 )
def test_invalid_attr_option(attr_name: str, target_attr: Union[str, int, bool, float, None]): if target_attr is None: # None cannot be caught as TVMError, as it causes a SIGKILL, therefore it must be prevented to be # entered into relay.backend.contrib.uma.RegisterTarget at Python level. with pytest.raises(ValueError): uma_backend = VanillaAcceleratorBackend() uma_backend._target_attrs = {attr_name: target_attr} uma_backend.register() else: registration_func = tvm.get_global_func( "relay.backend.contrib.uma.RegisterTarget") target_name = f"{attr_name}_{target_attr}" target_attr = {attr_name: target_attr} with pytest.raises( tvm.TVMError, match=r"Only String, Integer, or Bool are supported. .*"): registration_func(target_name, target_attr)
def test_get_callback_with_node(): x = tvm.runtime.convert(10) def test(y): assert y.handle != x.handle return y f2 = tvm.runtime.convert(test) # register into global function table @tvm.register_func def my_callback_with_node(y, f): assert y == x return f(y) # get it out from global function table f = tvm.get_global_func("my_callback_with_node") assert isinstance(f, tvm.runtime.PackedFunc) y = f(x, f2) assert(y.value == 10)
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(ref_mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) compile_engine.get().clear() check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
def compile(func, mod, ctx, tgt, name='default', record_time=False): """Compile a relay function into a native library function. Parameters ---------- func: Expr The function. mod: Module The Module. ctx: Context The Context. tgt: Target The target name: String The name of the target binary library. record_time: Bool Time cost to call f? Returns ------- result: Function A function that, when pass in some values, will convert them to the right format and call the compiled func. """ global _LIB if isinstance(func, GlobalVar): func = mod[func] assert isinstance(func, Function) compiler = AoTCompiler(mod, tgt) func = compiler.optimize(func) func = compiler.visit(func) lib_name, packed_name = lib_and_func_name(name) constants, source_code = to_source.to_source(mod, func, compiler.gv_map, ctx, packed_name) lib_name = f"librelay_aot_{_LIB_COUNTER}.so" library_path = compile_cpp(source_code, lib_name, flags=["-O3"]) _LIB.append(load_lib(library_path)) fn = get_global_func(packed_name) return _mk_wrapper(fn, ctx, constants, record_time)
def verify(target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): print("skip because extern function is not available") return if not nnpack.is_available(): return ctx = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.uniform(size=(l)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(m, l)).astype(B.dtype), ctx) d = tvm.nd.array(np.zeros((m, ), dtype=D.dtype), ctx) bb = 10.0 f(a, b, d, bb) tvm.testing.assert_allclose( d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy().T) + bb, rtol=1e-5)
def verify(target="llvm"): if not tvm.testing.device_enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.mkl.matmul_u8s8s32", True): print("skip because extern function is not available") return dev = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.randint(low=0, high=50, size=ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.randint(low=0, high=50, size=bshape).astype(B.dtype), dev) d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), dev) bb = 10 f(a, b, d, bb) tvm.testing.assert_allclose( d.numpy(), get_numpy(a.numpy().astype("int32"), b.numpy().astype("int32"), bb, transa, transb), rtol=1e-5, )
def run_intervals(intervals, tolerance=0): """Helper to run intervals""" expected_mem = find_maximum_from_intervals(intervals) pools = [WorkspacePoolInfo("default", [])] buffers = [] # populate for i, (start, stop, size) in enumerate(intervals): buf = BufferInfo(str(i), size, pools) # buf.set_pool_candidates( ["default"] ) buffers.append(buf) # intersect for i, (i_start, i_stop, _) in enumerate(intervals): conflicts = set() for j, (j_start, j_stop, _) in enumerate(intervals): start = min(i_start, j_start) stop = max(i_stop, j_stop) i_dur = i_stop - i_start + 1 j_dur = j_stop - j_start + 1 if i != j and (stop - start + 1 < i_dur + j_dur): conflicts.add(buffers[j]) buffers[i].set_conflicts([c for c in sorted(conflicts, key=lambda c: c.name_hint)]) result = {} for (alg, params) in [ ("tir.usmp.algo.hill_climb", (expected_mem,)), ("tir.usmp.algo.greedy_by_size", (expected_mem,)), ]: fusmp_algo = tvm.get_global_func(alg) print("\n", "started", alg) buffer_info_arr = fusmp_algo(buffers, *params) print() _verify_all_conflicts(buffer_info_arr) result[alg], msg = _check_max_workspace_size( buffer_info_arr, pools[0], expected_mem, tolerance ) if not result[alg]: print(alg, msg) return result
def save_tensors(params): """Save parameter dictionary to binary bytes. The result binary bytes can be loaded by the GraphModule with API "load_params". Parameters ---------- params : dict of str to NDArray The parameter dictionary. Returns ------- param_bytes: bytearray Serialized parameters. """ _save_tensors = tvm.get_global_func("tvm.relay._save_param_dict") return _save_tensors(params)
def verify_quantized_matmul_add(m, l, n, transa=False, transb=False): if not tvm.get_global_func("tvm.contrib.mkl.matmul_u8s8s32", True): pytest.skip("Quantized dense is supported only for MKL. TVM GPU CI uses openblas") data_dtype = "uint8" kernel_dtype = "int8" out_dtype = "int32" bias = te.var("bias", dtype=out_dtype) ashape = (l, n) if transa else (n, l) bshape = (m, l) if transb else (l, m) A = te.placeholder(ashape, name="A", dtype=data_dtype) B = te.placeholder(bshape, name="B", dtype=kernel_dtype) C = mkl.matmul_u8s8s32(A, B, transa, transb, dtype=out_dtype) D = te.compute(C.shape, lambda i, j: C[i, j] + bias, name="D") s = te.create_schedule(D.op) def get_numpy(a, b, bb, transa, transb): if transa: a = a.transpose() if transb: b = b.transpose() return np.dot(a, b) + bb def verify(target="llvm"): if not tvm.testing.device_enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.mkl.matmul_u8s8s32", True): print("skip because extern function is not available") return dev = tvm.cpu(0) f = tvm.build(s, [A, B, D, bias], target) a = tvm.nd.array(np.random.randint(low=0, high=50, size=ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.randint(low=0, high=50, size=bshape).astype(B.dtype), dev) d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), dev) bb = 10 f(a, b, d, bb) tvm.testing.assert_allclose( d.numpy(), get_numpy(a.numpy().astype("int32"), b.numpy().astype("int32"), bb, transa, transb), rtol=1e-5, ) verify()
def test_mobilenet_dnnl(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) data = get_calibration_data(mod, {"data": i_data, **params}) # Check the number and orders check_data_size(mod, data)
def requires_cublas(*args): """Mark a test as requiring the cuBLAS library. This also marks the test as requiring a cuda gpu. Parameters ---------- f : function Function to mark """ requirements = [ pytest.mark.skipif( tvm.get_global_func("tvm.contrib.cublas.matmul", True), reason="cuDNN library not enabled", ), *requires_cuda(), ] return _compose(args, requirements)
def test_uma_target(target_name, target_attrs, target_args): registration_func = tvm.get_global_func( "relay.backend.contrib.uma.RegisterTarget") registration_func(target_name, target_attrs) # Test Defaults my_target = tvm.target.Target(target_name) assert str(my_target.kind) == target_name for attr in target_attrs.keys(): assert my_target.attrs[attr] == target_attrs[attr] # Test with parameters overwritten args = " ".join((f"--{k}={v}" for k, v in target_args.items())) my_target = tvm.target.Target(f"{target_name} {args}") for attr in target_args.keys(): assert my_target.attrs[attr] == target_args[attr]
def test_custom_algo(): target = Target("c") global_workspace_pool = WorkspacePoolInfo( "global_workspace", [target], ) tir_mod = ResnetStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool]) tir_mod = tir_mod.with_attr("executor", tvm.relay.backend.Executor("aot")) tir_mod = tir_mod.with_attr("runtime", tvm.relay.backend.Runtime("crt")) tir_mod["__tvm_main__"] = tir_mod[ "tvmgen_default_fused_cast_subtract_fixed_point_multiply_add_clip_cast_cast" ] algo_called = False @tvm.register_func("tir.usmp.algo.trivial") def _trivial_algo(buf_infos, mem_pressure): nonlocal algo_called algo_called = True out_layout = {} offset = 0 for buf_info in buf_infos: pool_info = buf_info.pool_candidates[0] out_layout[buf_info] = usmp_utils.PoolAllocation(pool_info, offset) offset += buf_info.size_bytes return out_layout usmp_pass = tvm.get_global_func("tir.transform.UnifiedStaticMemoryPlanner") usmp_pass()(tir_mod) assert not algo_called with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "trivial"}): usmp_pass()(tir_mod) assert algo_called with pytest.raises( tvm.TVMError, match="The selected custom USMP algorithm : invalid is not defined" ): with tvm.transform.PassContext(config={"tir.usmp.custom_algorithm": "invalid"}): usmp_pass()(tir_mod)
def test_dense(): """Test a subgraph with a single dense operator.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" a_shape = (1, 512) b_shape = (1024, 512) def gen_dense(): a = relay.var("A", shape=a_shape, dtype=dtype) b = relay.var("B", shape=b_shape, dtype=dtype) out = relay.nn.dense(a, b) func = relay.Function([a, b], out) func = set_func_attr(func, "dnnl", "dnnl_0") glb_var = relay.GlobalVar("dnnl_0") mod = tvm.IRModule() mod[glb_var] = func mod = transform.InferType()(mod) a = relay.var("A", shape=a_shape, dtype=dtype) b = relay.var("B", shape=b_shape, dtype=dtype) main_f = relay.Function([a, b], glb_var(a, b)) mod["main"] = main_f mod = transform.InferType()(mod) a = relay.var("A", shape=a_shape, dtype=dtype) b = relay.var("B", shape=b_shape, dtype=dtype) out = relay.nn.dense(a, b) main_f = relay.Function([a, b], out) ref_mod = tvm.IRModule() ref_mod["main"] = main_f ref_mod = transform.InferType()(ref_mod) return mod, ref_mod mod, ref_mod = gen_dense() data_a = np.random.uniform(0, 1, a_shape).astype(dtype) data_b = np.random.uniform(0, 1, b_shape).astype(dtype) check_result(mod, ref_mod, {"A": data_a, "B": data_b}, (1, 1024), tol=1e-5)
def verify(target="llvm"): if not tvm.testing.device_enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func(lib.__name__ + ".matmul", True): print("skip because extern function is not available") return dev = tvm.cpu(0) name = "test_batch_matmul" f = tvm.build(s, [A, B, D], target, name=name) if target == "c": f = compile(f, name) a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), dev) b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), dev) d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), dev) f(a, b, d) tvm.testing.assert_allclose( d.numpy(), get_numpy(a.numpy(), b.numpy(), transa, transb), rtol=1e-5 )
def verify(target="llvm"): if not tvm.module.enabled(target): print("skip because %s is not enabled..." % target) return if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): print("skip because extern function is not avalable") return ctx = tvm.cpu(0) f = tvm.build(s, [data, kernel, bias, output], target) na = np.random.uniform(size=dshape).astype(data.dtype) nb = np.random.uniform(size=kshape).astype(kernel.dtype) nc = np.zeros(bshape, dtype=bias.dtype) ta = tvm.nd.array(na, ctx) tb = tvm.nd.array(nb, ctx) tc = tvm.nd.array(nc, ctx) td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx) f(ta, tb, tc, td) nd = np_conv(na, nb, PAD) np.testing.assert_allclose( td.asnumpy(), nd, rtol=1e-5)
def test_env_func(): @tvm.register_func("test.env_func") def test(x): return x + 1 f = tvm.get_global_func("test.env_func") x = tvm.get_env_func("test.env_func") assert x.name == "test.env_func" json_str = tvm.save_json([x]) y = tvm.load_json(json_str)[0] assert y.name == x.name assert y(1) == 2 assert y.func(1) == 2 x = tvm.make.node("attrs.TestAttrs", name="xx", padding=(3,4), func=y) assert x.name == "xx" assert x.padding[0].value == 3 assert x.padding[1].value == 4 assert x.axis == 10 x = tvm.load_json(tvm.save_json(x)) assert isinstance(x.func, tvm.container.EnvFunc) assert x.func(10) == 11
def save_tensors(params): """Save parameter dictionary to binary bytes. The result binary bytes can be loaded by the GraphModule with API "load_params". Parameters ---------- params : dict of str to NDArray The parameter dictionary. Returns ------- param_bytes: bytearray Serialized parameters. """ _save_tensors = tvm.get_global_func("_save_param_dict") args = [] for k, v in params.items(): args.append(k) args.append(tvm.nd.array(v)) return _save_tensors(*args)
# pylint: disable=invalid-name """Attr dictionary object used by schedule functions""" import tvm _dict_get = tvm.get_global_func("nnvm.compiler._dict_get") _dict_size = tvm.get_global_func("nnvm.compiler._dict_size") _dict_keys = tvm.get_global_func("nnvm.compiler._dict_keys") class AttrDict(object): """Attribute dictionary in nnvm. Used by python registration of compute and schedule function. AttrDict is passed as the first argument to schedule and compute function. """ _tvm_tcode = 18 def __init__(self, handle): self.handle = handle def __del__(self): tvm.nd.free_extension_handle(self.handle, 18) @property def _tvm_handle(self): return self.handle.value def __getitem__(self, key): return _dict_get(self, key) def keys(self): """Get list of keys in the dict.
def program_fpga(file_name): path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) bitstream = Bitstream(path) bitstream.download() logging.info("Program FPGA with %s", file_name)
simple_mode=True): """Do lower while keeping all axes in IR i.e. Do not eliminate loop with extent of 1, do not vectorize, unroll or inject virtual threads """ binds, _ = build_module.get_binds(args, binds) sch = sch.normalize() # Phase 0 bounds = schedule.InferBound(sch) stmt = schedule.ScheduleOps(sch, bounds, True) stmt = ir_pass.StorageFlatten(stmt, binds, 64) stmt = ir_pass.CanonicalSimplify(stmt) assert simple_mode return stmt try: _get_buffer_curve_sample_flatten = get_global_func( "autotvm.feature.GetCurveSampleFeatureFlatten") _get_itervar_feature = get_global_func("autotvm.feature.GetItervarFeature") _get_itervar_feature_flatten = get_global_func("autotvm.feature.GetItervarFeatureFlatten") except ValueError as e: def raise_error(*args, **kwargs): # pylint: disable=unused-argument raise RuntimeError("Cannot load autotvm c++ API") _get_buffer_curve_sample_flatten = _get_itervar_feature = _get_itervar_feature_flatten = \ raise_error def get_itervar_feature(sch, args, take_log=False): """get features of iter vars Parameters ---------- sch: tvm.schedule.Schedule args: Array of tvm.tensor.Tensor
---------- g : Graph The input graph layout : dict of str to str or str The input layout Returns ------- g : Graph The updated graph with updated layout. """ if isinstance(layout, dict): list_layout = [ layout.get(name, "__undef__") for name in g.index.input_names] elif isinstance(layout, str): list_layout = ["__undef__"] * len(g.index.input_names) list_layout[0] = layout else: raise ValueError("Input layout must be str or dict") last_inferred_layouts = g.json_attr("layout") if last_inferred_layouts: input_layout = [last_inferred_layouts[g.index.entry_id(x)] for x in g.index.input_names] for i, layout_stored in enumerate(input_layout): list_layout[i] = list_layout[i] if list_layout[i] != '__undef__' else layout_stored g._set_json_attr("layout_inputs", list_layout, 'list_layout') return g _move_out_module = tvm.get_global_func("nnvm.graph._move_module") _move_out_graph = tvm.get_global_func("nnvm.graph._move_graph")
def ext_dev_callback(): load_vta_dll() return tvm.get_global_func("device_api.ext_dev")()
def clear_stats(): """Clear profiler statistics""" f = tvm.get_global_func("vta.simulator.profiler_clear", True) if f: f()
top.tag : Contains explanation of the tag type. """ # Elementwise operator ELEMWISE = 0 # Broadcast operator BROADCAST = 1 # Injective mapping INJECTIVE = 2 # Comunication COMM_REDUCE = 3 # Complex op, can still fuse ewise into it OUT_ELEMWISE_FUSABLE = 4 # Not fusable opaque op OPAQUE = 8 _register_compute = tvm.get_global_func("nnvm._register_compute") _register_schedule = tvm.get_global_func("nnvm._register_schedule") _register_pattern = tvm.get_global_func("nnvm._register_pattern") _register_alter_op_layout = tvm.get_global_func("nnvm.compiler._register_alter_op_layout") def register_compute(op_name, f=None, level=10): """Register compute function for operator Parameters ---------- op_name : str The name of operator f : function The schedule function
# "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name """Helper utility to save parameter dict""" import tvm _save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict") _load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict") def save_param_dict(params): """Save parameter dictionary to binary bytes. The result binary bytes can be loaded by the GraphModule with API "load_params". Parameters ---------- params : dict of str to NDArray The parameter dictionary. Returns -------
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name """Compiler engine interface to internal engine You can get the engine singleton at ``nnvm.compiler.engine`` """ import tvm _list_cache_items = tvm.get_global_func("nnvm.compiler.ListCacheItems") _clear_cache = tvm.get_global_func("nnvm.compiler.ClearCache") _get_cache_item = tvm.get_global_func("nnvm.compiler.GetCacheItem") _set_cache_item = tvm.get_global_func("nnvm.compiler.SetCacheItem") _graph_key_get_graph = tvm.get_global_func("nnvm.compiler.GraphKeyGetGraph") _make_graph_key = tvm.get_global_func("nnvm.compiler.MakeGraphKey") @tvm.register_node class GraphKey(tvm.node.NodeBase): """Key of a graph compilation context""" @property def graph(self): return _graph_key_get_graph(self) @tvm.register_node
out_dtype: list of tuple Dtype of outputs """ graph = graph_attr.set_dtype_inputs(graph, dtype) graph = graph.apply("InferType") dtype = graph.json_attr("dtype") index = graph.index input_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]] for x in index.input_names] output_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]] for x in index.output_entries] return input_dtype, output_dtype _deep_compare = tvm.get_global_func("nnvm.graph.DeepCompare") def check_graph_equal(grapha, graphb, compare_variable_attrs=False): """Check if two graphs have equal structure. Parameters ---------- grapha : Graph The first graph graphb : Graph The second graph compare_variable_attrs : bool, optional Whether we want to compare attributes(names) on variables. Usually it is safe to skip it unless we want input name
def enabled(): """Check if simulator is enabled.""" f = tvm.get_global_func("vta.simulator.profiler_clear", True) return f is not None
# "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name """Helper utility to save parameter dicts.""" import tvm _save_param_dict = tvm.get_global_func("tvm.relay._save_param_dict") _load_param_dict = tvm.get_global_func("tvm.relay._load_param_dict") def save_param_dict(params): """Save parameter dictionary to binary bytes. The result binary bytes can be loaded by the GraphModule with API "load_params". Parameters ---------- params : dict of str to NDArray The parameter dictionary. Returns -------