def test_compile_injective_with_tuple(): x = relay.var("x", shape=(2, 3)) y = relay.var("y", shape=(2, 3)) x_transpose = relay.transpose(x) output = relay.Tuple([x_transpose, y]) func = relay.Function([x, y], output) relay.build(func, 'llvm')
def test_tuple_intermediate(): def before(x): inj = relay.squeeze(x) y1 = relay.add(inj, relay.const(1, "float32")) tmp = relay.squeeze(inj) tmp = relay.add(tmp, relay.const(1, "float32")) y2 = relay.add(tmp, relay.const(1, "float32")) y3 = relay.add(inj, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out_inj = relay.squeeze(concat) out = relay.add(out_inj, relay.const(1, "float32")) return relay.Function(relay.ir_pass.free_vars(out), out) def expected(p0): f0 = before(p0) x = relay.var("x", shape=dshape) y = relay.Call(f0, [x]) return relay.Function([x], y) dshape = (1, 16, 64, 64) x = relay.var("x", shape=dshape) z = before(x) z = relay.ir_pass.infer_type(z) zz = relay.ir_pass.fuse_ops(z, opt_level=0) assert not relay.ir_pass.free_vars(zz) zz = relay.ir_pass.fuse_ops(z, opt_level=2) relay.build(zz, 'llvm') zz = relay.ir_pass.infer_type(zz) assert not relay.ir_pass.free_vars(zz) after = relay.ir_pass.infer_type(expected(x)) assert relay.ir_pass.alpha_equal(zz, after)
def test_tuple_consecutive(): def gen_intermediate_tuple(x): y1 = relay.add(x, relay.const(1, "float32")) y2 = relay.add(x, relay.const(1, "float32")) y3 = relay.add(x, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out = relay.add(concat, relay.const(1, "float32")) return out def gen_consecutive_tuple(x): y1 = gen_intermediate_tuple(x) y2 = gen_intermediate_tuple(x) y3 = gen_intermediate_tuple(x) concat = relay.concatenate((y1, y2, y3), axis=1) return concat def before(x): concat = gen_consecutive_tuple(x) pooled = relay.nn.max_pool2d(concat, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) out = relay.add(pooled, relay.const(1, "float32")) out2 = relay.add(out, relay.const(1, "float32")) out_tup = relay.Tuple((out, out2)) return relay.Function(relay.ir_pass.free_vars(out_tup), out_tup) def expected(dshape): p0 = relay.var("p0", shape=dshape) concat = gen_consecutive_tuple(p0) f0 = relay.Function([p0], concat) p01 = relay.var("p01", shape=(1, dshape[1]*9, dshape[2], dshape[3])) pooled = relay.nn.max_pool2d(p01, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) out = relay.add(pooled, relay.const(1, "float32")) f1 = relay.Function([p01], out) p02 = relay.var("p02", shape=(1, dshape[1]*9, dshape[2]//2, dshape[3]//2)) out = relay.add(p02, relay.const(1, "float32")) f2 = relay.Function([p02], out) x = relay.var("x", shape=dshape) y = relay.Call(f0, [x]) z = relay.Call(f1, [y]) z2 = relay.Call(f2, [z]) return relay.Function([x], relay.Tuple((z, z2))) dshape = (1, 16, 64, 64) x = relay.var("x", shape=dshape) z = before(x) z = relay.ir_pass.infer_type(z) zz = relay.ir_pass.fuse_ops(z, opt_level=0) assert not relay.ir_pass.free_vars(zz) zz = relay.ir_pass.fuse_ops(z, opt_level=2) relay.build(zz, 'llvm') zz = relay.ir_pass.infer_type(zz) assert not relay.ir_pass.free_vars(zz) after = relay.ir_pass.infer_type(expected(dshape)) assert relay.ir_pass.alpha_equal(zz, after)
def test_gru_like(): def unit(rnn_dim): X = relay.var("X", shape=(1, rnn_dim)) W = relay.var("y", shape=(3 * rnn_dim, rnn_dim)) matmul = relay.nn.dense(X, W) splitted = relay.split(matmul, indices_or_sections=3, axis=1) out = relay.sigmoid(splitted[0]) + relay.tanh(splitted[1]) * relay.exp(splitted[2]) return relay.Function([X, W], out) def sigmoid(x): return 1 / (1 + np.exp(-x)) def unit_numpy(X, W): prod = np.dot(X, W.transpose()) splits = np.split(prod, indices_or_sections=3, axis=1) return sigmoid(splits[0]) + np.tanh(splits[1]) * np.exp(splits[2]) dtype = "float32" rnn_dim = 1000 x = np.random.rand(1, rnn_dim).astype(dtype) y = np.random.rand(3*rnn_dim, rnn_dim).astype(dtype) * 0.01 - 0.005 out_shape = (1, rnn_dim) z = unit(rnn_dim) for target, ctx in ctx_list(): with relay.build_config(opt_level=2): graph, lib, params = relay.build(z, target) m = graph_runtime.create(graph, lib, ctx) m.set_input("X", tvm.nd.array(x.astype(dtype))) m.set_input("y", tvm.nd.array(y.astype(dtype))) m.set_input(**params) m.run() out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() ref = unit_numpy(x, y) tvm.testing.assert_allclose(out, ref, rtol=1e-5, atol=1e-5)
def test_compile_placeholder_bypass(): engine = relay.backend.compile_engine.get() x = relay.var("x", shape=(2, 3)) y = relay.var("y", shape=(2, 3)) z = relay.var("z", shape=(2, 3)) result = relay.Tuple([x, relay.op.concatenate([y, z], axis=0)]) func = relay.Function(relay.ir_pass.free_vars(result), result) with relay.build_config(opt_level=0): graph, lib, params = relay.build(func, 'llvm')
def get_tvm_output(xs, target, ctx, dtype='float32'): shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)} func, params = relay.frontend.from_keras(keras_model, shape_dict) with relay.build_module.build_config(opt_level=2): graph, lib, params = relay.build(func, target, params=params) m = graph_runtime.create(graph, lib, ctx) for name, x in zip(keras_model.input_names, xs): m.set_input(name, tvm.nd.array(x.astype(dtype))) m.set_input(**params) m.run() return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())]
def get_tvm_output(func, x, params, target, ctx, out_shape=(1, 1000), input_name='image', dtype='float32'): with relay.build_module.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) m = graph_runtime.create(graph, lib, ctx) # set inputs m.set_input(input_name, tvm.nd.array(x.astype(dtype))) m.set_input(**params) m.run() # get outputs out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) return out.asnumpy()
def test_with_params(): x = relay.var('x', shape=(10, 5)) y = relay.var('y', shape=(1, 5)) z = relay.add(x, y) z = relay.exp(z) func = relay.Function([x, y], z) x_data = np.random.rand(10, 5).astype('float32') y_data = np.random.rand(1, 5).astype('float32') params = {"y": y_data} graph, lib, params = relay.build(func, "llvm", params=params) mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) mod.set_input(**params) mod.set_input(x=x_data) mod.run() res = mod.get_output(0).asnumpy() ref_res = np.exp(y_data + x_data) tvm.testing.assert_allclose(res, ref_res)
def run_tvm_graph(tflite_model_buf, input_data, input_node, num_output=1, target='llvm', out_names=None): """ Generic function to compile on relay and execute on tvm """ try: import tflite.Model except ImportError: raise ImportError("The tflite package must be installed") # get TFLite model from buffer tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0) input_data = convert_to_list(input_data) input_node = convert_to_list(input_node) shape_dict = {} dtype_dict = {} for i, e in enumerate(input_node): shape_dict[e] = input_data[i].shape dtype_dict[e] = input_data[i].dtype.name func, params = relay.frontend.from_tflite(tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict) with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) ctx = tvm.context(target, 0) from tvm.contrib import graph_runtime m = graph_runtime.create(graph, lib, ctx) # set inputs for i, e in enumerate(input_node): m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype))) m.set_input(**params) # execute m.run() # get outputs assert out_names is None or num_output == len(out_names), "out_names: {} num_output: {}".format( out_names, num_output) tvm_output_list = [] for i in range(0, num_output): tvm_output = m.get_output(i) tvm_output_list.append(tvm_output.asnumpy()) return tvm_output_list
def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'): shape_dict = {"data": x.shape} if gluon_impl: new_sym, params = relay.frontend.from_mxnet(symbol, shape_dict) else: new_sym, params = relay.frontend.from_mxnet(symbol, shape_dict, arg_params=args, aux_params=auxs) with relay.build_config(opt_level=3): graph, lib, params = relay.build(new_sym, target, params=params) m = graph_runtime.create(graph, lib, ctx) # set inputs m.set_input("data", tvm.nd.array(x.astype(dtype))) m.set_input(**params) m.run() # get outputs out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) return out.asnumpy()
def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'): """ Generic function to execute and get tvm output""" target = 'llvm' if isinstance(input_data, list): input_names = {} shape_dict = {} dtype_dict = {} for i, _ in enumerate(input_data): input_names[i] = graph_def.graph.input[i].name shape_dict[input_names[i]] = input_data[i].shape dtype_dict[input_names[i]] = input_data[i].dtype else: input_names = graph_def.graph.input[0].name shape_dict = {input_names: input_data.shape} dtype_dict = {input_names: input_data.dtype} sym, params = relay.frontend.from_onnx(graph_def, shape_dict) with relay.build_config(opt_level=1): graph, lib, params = relay.build(sym, target, params=params) ctx = tvm.cpu(0) from tvm.contrib import graph_runtime m = graph_runtime.create(graph, lib, ctx) # set inputs if isinstance(input_data, list): for i, e in enumerate(input_names): m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype))) else: m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype))) m.set_input(**params) # execute m.run() # get outputs if isinstance(output_shape, list) and isinstance(output_dtype, list): tvm_output_list = [] for i, _ in enumerate(output_shape): tvm_output = m.get_output(i) tvm_output_list.append(tvm_output.asnumpy()) return tvm_output_list else: tvm_output = m.get_output(0) return tvm_output.asnumpy()
def get_tvm_output(net, data, params, target, ctx, dtype='float32'): with relay.build_config(opt_level=1): graph, lib, params = relay.build(net, target, params=params) m = graph_runtime.create(graph, lib, ctx) # set inputs m.set_input("data", data) m.set_input(**params) m.run() out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) if measure: print("Evaluate graph runtime inference time cost...") ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20) # Measure in millisecond. prof_res = np.array(ftimer().results) * 1000 print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) return out.asnumpy()
def test_runtime(target, device, func, fallback_device=None, expected_index=None): params = {"x": x_data, "y": y_data} config = {"opt_level": 1} if fallback_device: config["fallback_device"] = fallback_device with relay.build_config(**config): graph, lib, params = relay.build( func, target, params=params) contexts = [tvm.cpu(0), tvm.context(device)] graph_json = json.loads(graph) if "device_index" in graph_json["attrs"]: device_index = graph_json["attrs"]["device_index"][1] assert device_index == expected_index mod = graph_runtime.create(graph, lib, contexts) mod.set_input(**params) mod.run() res = mod.get_output(0).asnumpy() tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
def verify_graph_runtime(remote, target, shape, dtype): x = relay.var('x') y = relay.const(1) z = relay.add(x, y) func = relay.Function([x], z) x_in = np.ones(shape).astype(dtype) params = {'x': x_in} graph, lib, params = relay.build(func, target=target, params=params) temp = util.tempdir() path_dso = temp.relpath("dev_lib.o") lib.save(path_dso) remote.upload(path_dso) lib = remote.load_module("dev_lib.o") ctx = remote.cpu(0) mod = graph_runtime.create(graph, lib, ctx) mod.load_params(relay.save_param_dict(params)) mod.run() out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx)) tvm.testing.assert_allclose(x_in + 1, out.asnumpy())
def run_tvm_graph(coreml_model, target, ctx, input_data, input_name, output_shape, output_dtype='float32'): """ Generic function to compile on relay and execute on tvm """ if isinstance(input_data, list): shape_dict = {} dtype_dict = {} for i, e in enumerate(input_name): shape_dict[e] = input_data[i].shape dtype_dict[e] = input_data[i].dtype else: shape_dict = {input_name: input_data.shape} dtype_dict = {input_name: input_data.dtype} func, params = relay.frontend.from_coreml(coreml_model, shape_dict) with relay.build_module.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) from tvm.contrib import graph_runtime m = graph_runtime.create(graph, lib, ctx) # set inputs if isinstance(input_data, list): for i, e in enumerate(input_name): m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype))) else: m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype))) m.set_input(**params) # execute m.run() # get outputs if isinstance(output_shape, list) and isinstance(output_dtype, list): tvm_output_list = [] for i, s in enumerate(output_shape): tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i])) tvm_output_list.append(tvm_output.asnumpy()) return tvm_output_list else: tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype)) return tvm_output.asnumpy()
def test_tuple_consecutive(): def gen_intermediate_tuple(x): y1 = relay.add(x, relay.const(1, "float32")) y2 = relay.add(x, relay.const(1, "float32")) y3 = relay.add(x, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out = relay.add(concat, relay.const(1, "float32")) return out def gen_consecutive_tuple(x): y1 = gen_intermediate_tuple(x) y2 = gen_intermediate_tuple(x) y3 = gen_intermediate_tuple(x) concat = relay.concatenate((y1, y2, y3), axis=1) return concat def before(x): concat = gen_consecutive_tuple(x) pooled = relay.nn.max_pool2d(concat, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) out = relay.add(pooled, relay.const(1, "float32")) out2 = relay.add(out, relay.const(1, "float32")) out_tup = relay.Tuple((out, out2)) return relay.Function(relay.analysis.free_vars(out_tup), out_tup) def expected(dshape): p0 = relay.var("p0", shape=dshape) concat = gen_consecutive_tuple(p0) f0 = relay.Function([p0], concat) f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p01 = relay.var("p01", shape=(1, dshape[1] * 9, dshape[2], dshape[3])) pooled = relay.nn.max_pool2d(p01, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) out = relay.add(pooled, relay.const(1, "float32")) f1 = relay.Function([p01], out) f1 = f1.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p02 = relay.var("p02", shape=(1, dshape[1] * 9, dshape[2] // 2, dshape[3] // 2)) out = relay.add(p02, relay.const(1, "float32")) f2 = relay.Function([p02], out) f2 = f2.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) x = relay.var("x", shape=dshape) y = relay.Call(f0, [x]) z = relay.Call(f1, [y]) z2 = relay.Call(f2, [z]) return relay.Function([x], relay.Tuple((z, z2))) dshape = (1, 16, 64, 64) x = relay.var("x", shape=dshape) orig = before(x) fuse0(tvm.IRModule.from_expr(orig)) m = fuse2(tvm.IRModule.from_expr(orig)) relay.build(m, "llvm") after = run_opt_pass(expected(dshape), transform.InferType()) assert tvm.ir.structural_equal(m["main"], after)
def test_meta_schedule_te2primfunc_argument_order(): @derived_object class TestDummyDatabase(PyDatabase): def __init__(self): super().__init__() self.records = [] self.workload_reg = [] def has_workload(self, mod: IRModule) -> Workload: for workload in self.workload_reg: if tvm.ir.structural_equal(workload.mod, mod): return True # The database has already put in all correct workloads raise ValueError( "The workload searched for is not in given database!" + " Incorrect TIR was generated from TE subgraph." ) def commit_tuning_record(self, record: TuningRecord) -> None: self.records.append(record) def commit_workload(self, mod: IRModule) -> Workload: for workload in self.workload_reg: if tvm.ir.structural_equal(workload.mod, mod): return workload workload = Workload(mod) self.workload_reg.append(workload) return workload def get_top_k(self, workload: Workload, top_k: int) -> List[TuningRecord]: return list( filter( lambda x: x.workload == workload, sorted(self.records, key=lambda x: sum(x.run_secs) / len(x.run_secs)), ) )[: int(top_k)] def __len__(self) -> int: return len(self.records) def print_results(self) -> None: print("\n".join([str(r) for r in self.records])) data_shape = (1, 3, 16, 16) weight_shape = (8, 3, 5, 5) data = relay.var("data", relay.TensorType(data_shape, "float32")) weight = relay.var("weight", relay.TensorType(weight_shape, "float32")) y = relay.nn.conv2d( data, weight, padding=(2, 2), kernel_size=(5, 5), kernel_layout="OIHW", out_dtype="float32", ) f = relay.Function([data, weight], y) mod = tvm.IRModule.from_expr(f) mod = relay.transform.InferType()(mod) data_sample = np.random.rand(*data_shape).astype("float32") weight_sample = np.random.rand(*weight_shape).astype("float32") params = {mod["main"].params[1].name_hint: weight_sample} input_name = "data" dev = tvm.cpu() target = Target("llvm --num-cores=16") data = tvm.nd.array(data_sample, dev) database = TestDummyDatabase() database.commit_workload(tvmgen_default_fused_layout_transform) database.commit_workload(tvmgen_default_fused_layout_transform_1) database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc) with ApplyHistoryBest(database): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_meta_schedule": True}, ): rt_mod1 = relay.build(mod, target=target, params=params) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=target, params=params) def get_output(data, lib): module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, rt_mod1) expected_output = get_output(data, rt_mod2) assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
def compile(graph: Graph, batch_size, target, target_host): relay_module, params = graph2relay(graph, batch_size) with relay.build_config(opt_level=3): graph_json, tvm_module, params = relay.build(relay_module, target=target, target_host=target_host, params=params) return graph_json, tvm_module, params
# Output numerical difference < 10e-4 %. # # DGL version: https://github.com/dmlc/dgl/blob/master/examples/mxnet/gcn/gcn.py from tvm.contrib import graph_runtime import time # Set up weights. You can modify this part and use your own trained weights. params['in_weight'] = np.ones((input_dim, hidden_dim), dtype='float32') params['out_weight'] = np.ones((hidden_dim, num_classes), dtype='float32') for i in range(num_hidden): params["%s_weight" % (str(i))] = np.ones((hidden_dim, hidden_dim), dtype='float32') # Generate graph and library with relay.build_config(opt_level=0): # Currently only support opt_level=0 graph, lib, params = relay.build(func, target, params=params) lib.save("lib.o") # Generate module for llvm ctx = tvm.context(target, 0) m = graph_runtime.create(graph, lib, ctx) m.set_input(**params) print("finished compiling, testing inference time cost") totaltime = 0 for i in range(30): st = time.time() # One forward pass on the entire network m.run() end = time.time() # Retrieve output Tensor as numpy array
def build_run_compare(tvm_mod, params1, input_shape, dtype="float32", target="llvm", gpu_preprocess=None): if "TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ: rpc_tracker_host = os.environ["TVM_TRACKER_HOST"] rpc_tracker_port = os.environ["TVM_TRACKER_PORT"] run_on_host = 0 target_host = "llvm -mtriple=arm64-linux-android" rpc_tracker_port = int(rpc_tracker_port) else: run_on_host = 1 target_host = "llvm" if gpu_preprocess: tvm_mod_nchwc = gpu_preprocess(tvm_mod) else: tvm_mod_nchwc = tvm_mod with relay.build_config(opt_level=3): graph, lib, params = relay.build(tvm_mod_nchwc, target_host=target_host, target=target, params=params1) if run_on_host: ctx = tvm.opencl() m = graph_runtime.create(graph, lib, ctx) else: from tvm import rpc from tvm.contrib import utils, ndk rpc_key = "android" tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port) remote = tracker.request(rpc_key, priority=0, session_timeout=600) temp = utils.tempdir() dso_binary = "dev_lib_cl.so" dso_binary_path = temp.relpath(dso_binary) ctx = remote.cl(0) lib.export_library(dso_binary_path, ndk.create_shared) remote.upload(dso_binary_path) rlib = remote.load_module(dso_binary) m = graph_runtime.create(graph, rlib, ctx) m.set_input(**params) inputs = [] if isinstance(input_shape, dict): for key in input_shape: inputs.append( np.random.normal(size=input_shape[key]).astype(dtype)) m.set_input(key, inputs[-1]) else: inputs.append(np.random.normal(size=input_shape).astype(dtype)) m.set_input("data", inputs[-1]) m.run() ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs) for i, ref_output in enumerate(ref_outputs): tvm_output = m.get_output(i) output = tvm_output.asnumpy() # for index, x in np.ndenumerate(ref_output): # if abs(output[index] - x) > 0.01: # print(index, output[index], x) np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1)
]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) tvm_target = get_tvm_target(device, get_device_type(), get_device_arch(), get_device_attributes()) tvm_targets = tvm.target.Target(tvm_target) cpu_target = "llvm" target_host = cpu_target cpudevice = tvm.runtime.cpu() with tvm.transform.PassContext(opt_level=3): graph_mod = relay.build(mod, tvm_targets, params=params, target_host=target_host) lib = graph_mod.get_lib() params = graph_mod.get_params() # Create a runtime executor module module = graph_executor.GraphModule(graph_mod["default"](cpudevice)) # Feed input data module.set_input(input_tensor, tvm.nd.array(image_data)) # Feed related params module.set_input(**params) ftimer = module.module.time_evaluator("run", cpudevice, number=1, repeat=10)
env.BATCH, env.BLOCK_OUT, env.WGT_WIDTH, start_name=pack_dict[model][0], stop_name=pack_dict[model][1], device_annot=(env.TARGET == "intelfocl"), ) else: relay_prog = mod["main"] # Compile Relay program with AlterOpLayout disabled if target.device_name != "vta": with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}): graph, lib, params = relay.build(relay_prog, target=target, params=params, target_host=env.target_host) else: if env.TARGET == "intelfocl": # multiple targets to run both on cpu and vta target = {"cpu": env.target_vta_cpu, "ext_dev": target} with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): graph, lib, params = relay.build(relay_prog, target=target, params=params, target_host=env.target_host) # Measure Relay build time build_time = time.time() - build_start print(model + " inference graph built in {0:.2f}s!".format(build_time))
def get_ref_rt_mod(mod, params, target="cuda"): with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) dev = tvm.device(target, 0) rt_mod = tvm.contrib.graph_executor.GraphModule(lib["default"](dev)) return rt_mod, dev
if local_demo: target_host = None target = 'llvm' elif test_target == 'opencl': target_host = target target = 'opencl' elif test_target == 'vulkan': target_host = target target = 'vulkan' input_name = 'input_1' shape_dict = {input_name: x.shape} func, params = relay.frontend.from_keras(keras_mobilenet_v2, shape_dict) with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target=target, target_host=target_host, params=params) # After `relay.build`, you will get three return values: graph, # library and the new parameter, since we do some optimization that will # change the parameters but keep the result of model as the same. # Save the library at local temporary directory. tmp = util.tempdir() lib_fname = tmp.relpath('net.so') fcompile = ndk.create_shared if not local_demo else None lib.export_library(lib_fname, fcompile) ###################################################################### # Deploy the Model Remotely by RPC # --------------------------------------------- # With RPC, you can deploy the model remotely from your host machine
def manual_tir_common(do_tune=False): M, N, K = 1024, 1024, 1024 # pylint: disable=invalid-name data_shape = (M, K) weight_shape = (N, K) data_dtype = "uint8" data = relay.var("data", shape=data_shape, dtype=data_dtype) weight = relay.var("weight", shape=weight_shape, dtype="int8") bias = relay.var("bias", shape=(weight_shape[0],), dtype="int32") # dense is tuned by the TIR schedule above, bmm is scheduled by TE (topi/x86/batch_matmul.py) dense = relay.nn.dense(data, weight, out_dtype="int32") bias_add = relay.nn.bias_add(dense, bias) + relay.const(1, dtype="int32") out = relay.nn.batch_matmul( relay.cast(relay.expand_dims(bias_add, 0), "uint8"), relay.cast(relay.expand_dims(bias_add, 0), "int8"), out_dtype="int32", ) relay_mod = tvm.IRModule.from_expr(out) target = "llvm -mcpu=cascadelake -num-cores 4" dev = tvm.device(target, 0) data = np.random.uniform(1, 10, size=(M, K)).astype("uint8") weight_np = np.random.uniform(1, 10, size=weight_shape).astype("int8") bias_np = np.random.uniform(1, 10, size=(weight_shape[0],)).astype("int32") ref = ( relay.create_executor("vm", mod=relay_mod, device=dev, target=target) .evaluate()(*[data, weight_np, bias_np]) .numpy() ) params = {"weight": weight_np, "bias": bias_np} if do_tune: extracted_tasks = extract_task_from_relay(relay_mod, target, params) # Filter out tasks that we don't intend to schedule / tune with TIR. tune_tasks = list( filter( lambda task: "dense" in task.task_name, extracted_tasks, ) ) config = TuneConfig( strategy="replay_trace", num_trials_per_iter=64, max_trials_per_task=20000, max_trials_global=20000, ) with tempfile.TemporaryDirectory() as work_dir: # postprocs=lambda: [] is important to prevent default post processors from # tampering with the manual schedule. database = tune_extracted_tasks( tune_tasks, config, work_dir=work_dir, postprocs=lambda: [], ) else: def schedule_fn(task, sch): if "dense" not in task.task_name: return False block = sch.get_block("compute") # Looks up schedule_rule annotation. # See the comment in test_tune_relay_manual_tir_vnni(). schedule_rule = sch.get(block).annotations["schedule_rule"] assert "dense_vnni" in schedule_rule schedule_dense(block, M, False, sch) return True database = apply_fixed_schedules(relay_mod, target, params, schedule_fn) with ApplyHistoryBest(database): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_meta_schedule": True}, ): # pylint: disable=W0105 """ The log should say Warning: Cannot find workload: tvmgen_default_fused_expand_dims Warning: Cannot find workload: tvmgen_default_fused_cast Warning: Cannot find workload: tvmgen_default_fused_cast_1 Warning: Cannot find workload: tvmgen_default_fused_nn_batch_matmul This means batch matmul and others are scheduled by TE, and dense (the one not warned) is found in the meta schedule tuning database during ApplyHistoryBest """ # pylint: enable=W0105 lib = relay.build(relay_mod, target=target, params=params) runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev)) runtime.set_input("data", data) runtime.run() out = runtime.get_output(0).numpy() np.testing.assert_equal(out, ref)
def compile_model(self): if device == 'vta': self.remote = rpc.connect(self.pynq_addr, 9091) vta.reconfig_runtime(self.remote) vta.program_fpga(self.remote, bitstream=None) else: self.remote = rpc.LocalSession() self.ctx = self.remote.ext_dev( 0) if device == 'vta' else self.remote.cpu(0) # Load pre-configured AutoTVM schedules with autotvm.tophub.context(target): # Populate the shape and data type dictionary for ResNet input dtype_dict = {'data': 'float32'} shape_dict = {'data': (env.BATCH, 3, 224, 224)} gluon_model = vision.resnet18_v1( pretrained=True, ctx=ctx ).features if args.nonsplit else splitnet.resnet18_v1_split( self.id + 1) # Measure build start time build_start = time.time() # Start front end compilation mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict) # Update shape and type dictionary shape_dict.update({k: v.shape for k, v in params.items()}) dtype_dict.update({k: str(v.dtype) for k, v in params.items()}) # Perform quantization in Relay with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]): relay_prog = relay.quantize.quantize(mod['main'], params=params) # Perform graph packing and constant folding for VTA target if target.device_name == 'vta': assert env.BLOCK_IN == env.BLOCK_OUT relay_prog = graph_pack(relay_prog, env.BATCH, env.BLOCK_OUT, env.WGT_WIDTH, start_name=start_pack, stop_name=stop_pack) # Compile Relay program with AlterOpLayout disabled with relay.build_config(opt_level=3, disabled_pass={'AlterOpLayout'}): if target.device_name != 'vta': graph, lib, params = relay.build( relay_prog, target=target, params=params, target_host=env.target_host) else: with vta.build_config(): graph, lib, params = relay.build( relay_prog, target=target, params=params, target_host=env.target_host) self.params = params # Measure Relay build time build_time = time.time() - build_start print(f'inference graph for thread {self.id} built in {0:.4f}s!'. format(build_time)) # Send the inference library over to the remote RPC server temp = util.tempdir() lib.save(temp.relpath('graphlib.o')) self.remote.upload(temp.relpath('graphlib.o')) lib = self.remote.load_module('graphlib.o') # Graph runtime self.m = graph_runtime.create(graph, lib, self.ctx)
def compile_tvm_graph_runtime(model, model_name, layout, compute_layout, batch_size, seq_length, dtype, instance_type): key = (model_name, layout, compute_layout, batch_size, seq_length, dtype, instance_type) if key in _TVM_RT_CACHE: return _TVM_RT_CACHE[key] flags = get_ec2_tvm_flags()[instance_type] tvm = try_import_tvm() from tvm import relay from tvm.contrib import graph_runtime token_ids_shape = (batch_size, seq_length) if layout == 'NT' else (seq_length, batch_size) valid_length_shape = (batch_size,) if 'bart' in model_name: shape_dict = { 'data0': token_ids_shape, 'data1': valid_length_shape, 'data2': token_ids_shape, 'data3': valid_length_shape, } dtype_dict = { 'data0': 'int32', 'data1': 'int32', 'data2': 'int32', 'data3': 'int32', } elif 'roberta' in model_name or 'xlmr' in model_name: shape_dict = { 'data0': token_ids_shape, 'data1': valid_length_shape, } dtype_dict = { 'data0': 'int32', 'data1': 'int32', } else: shape_dict = { 'data0': token_ids_shape, 'data1': token_ids_shape, 'data2': valid_length_shape, } dtype_dict = { 'data0': 'int32', 'data1': 'int32', 'data2': 'int32' } sym = model._cached_graph[1] params = {} for k, v in model.collect_params().items(): params[v._var_name] = tvm.nd.array(v.data().asnumpy()) mod, params = relay.frontend.from_mxnet(sym, shape=shape_dict, dtype=dtype_dict, arg_params=params) target = flags['target'] use_gpu = flags['use_gpu'] opt_level = flags['opt_level'] required_pass = flags['required_pass'] with tvm.transform.PassContext(opt_level=opt_level, required_pass=required_pass): lib = relay.build(mod, target, params=params) if use_gpu: ctx = tvm.gpu() else: ctx = tvm.cpu() rt = graph_runtime.GraphModule(lib["default"](ctx)) _TVM_RT_CACHE[key] = rt return rt
def verify_model(model_name, input_data=[], custom_convert_map={}, ctx_list=ctx_list()): """Assert that the output of a compiled model matches with that of its baseline.""" if isinstance(model_name, str): baseline_model, baseline_input = load_model(model_name) elif isinstance(input_data, list): baseline_model = model_name baseline_input = input_data elif isinstance(input_data, torch.Tensor) or len(input_data.shape) == 0: baseline_model = model_name baseline_input = [input_data] else: assert False, "Unexpected input format" if torch.cuda.is_available(): baseline_model = baseline_model.cuda() baseline_input = [inp.cuda() for inp in baseline_input] with torch.no_grad(): baseline_outputs = baseline_model(*baseline_input) if isinstance(baseline_outputs, tuple): baseline_outputs = tuple(out.cpu().numpy() for out in baseline_outputs) else: baseline_outputs = (baseline_outputs.float().cpu().numpy(), ) trace = torch.jit.trace(baseline_model, baseline_input).float().eval() if torch.cuda.is_available(): trace = trace.cuda() else: trace = trace.cpu() input_names = get_graph_input_names(trace) input_shapes = dict(zip(input_names, [inp.shape for inp in baseline_input])) mod, params = relay.frontend.from_pytorch(trace, input_shapes, custom_convert_map) compiled_input = dict( zip(input_names, [inp.cpu().numpy() for inp in baseline_input])) with relay.build_config(opt_level=3): for target, ctx in ctx_list: relay_graph, relay_lib, relay_params = relay.build(mod, target=target, params=params) relay_model = graph_runtime.create(relay_graph, relay_lib, ctx) relay_model.set_input(**relay_params) for name, inp in compiled_input.items(): relay_model.set_input(name, inp) relay_model.run() for i, baseline_output in enumerate(baseline_outputs): compiled_output = relay_model.get_output(i).asnumpy() assert_shapes_match(baseline_output, compiled_output) tvm.testing.assert_allclose(baseline_output, compiled_output, rtol=1e-3, atol=1e-3) del model_name del baseline_model torch.cuda.empty_cache()
def run_unpropagatable_graph(dev, tgt): R""" The network is as following: a b c d \ / \ / add mul \ / subtract """ a = relay.var("a", shape=(10, 10)) b = relay.var("b", shape=(10, 10)) c = relay.var("c", shape=(10, 10)) d = relay.var("d", shape=(10, 10)) a_data = np.random.rand(10, 10).astype('float32') b_data = np.random.rand(10, 10).astype('float32') c_data = np.random.rand(10, 10).astype('float32') d_data = np.random.rand(10, 10).astype('float32') tmp_add = a_data + b_data tmp_mul = np.multiply(c_data, d_data) ref_res = np.subtract(tmp_add, tmp_mul) fallback_device = tvm.context("cpu") target = {"cpu": "llvm", dev: tgt} cpu_ctx = fallback_device dev_ctx = tvm.context(dev) def annotated(): add = relay.add(a, b) _add = relay.annotation.on_device(add, dev_ctx) mul = relay.multiply(c, d) _mul = relay.annotation.on_device(mul, cpu_ctx) sub = relay.subtract(add, mul) _sub = relay.annotation.on_device(sub, dev_ctx) func = relay.Function([a, b, c, d], relay.Tuple(tvm.convert([_add, _mul, _sub, sub]))) func = relay.ir_pass.infer_type(func) func = relay.ir_pass.rewrite_annotated_ops(func, dev_ctx.device_type) func = relay.ir_pass.infer_type(func) return relay.Function(relay.ir_pass.free_vars(func.body[3]), func.body[3]) def expected(): add = relay.add(a, b) mul = relay.multiply(c, d) copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx) sub = relay.subtract(add, copy_mul_sub) func = relay.Function([a, b, c, d], sub) return func annotated_func = annotated() expected_func = expected() expected_index = [2, 2, 2, 1, 1, 1, 2, 2] check_annotated_graph(annotated_func, expected_func) params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data} config = {"opt_level": 0} config["fallback_device"] = fallback_device with relay.build_config(**config): graph, lib, params = relay.build(annotated_func, target, params=params) contexts = [tvm.cpu(0), tvm.context(dev)] graph_json = json.loads(graph) if "device_index" in graph_json["attrs"]: device_index = graph_json["attrs"]["device_index"][1] assert device_index == expected_index mod = graph_runtime.create(graph, lib, contexts) mod.set_input(**params) mod.run() res = mod.get_output(0).asnumpy() tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
# If we run the example on our x86 server for demonstration, we can simply # set it as :code:`llvm`. If running it on the Raspberry Pi, we need to # specify its instruction set. Set :code:`local_demo` to False if you want # to run this tutorial with a real device. local_demo = True if local_demo: target = tvm.target.create('llvm') else: target = tvm.target.arm_cpu('rasp3b') # The above line is a simple form of # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon') with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) # After `relay.build`, you will get three return values: graph, # library and the new parameter, since we do some optimization that will # change the parameters but keep the result of model as the same. # Save the library at local temporary directory. tmp = util.tempdir() lib_fname = tmp.relpath('net.tar') lib.export_library(lib_fname) ###################################################################### # Deploy the Model Remotely by RPC # -------------------------------- # With RPC, you can deploy the model remotely from your host machine # to the remote device.
def compile_model( mod, params, target, dump_code=None, target_host=None, tuning_records=None, alter_layout=None, disabled_pass=None, ): """Compile a model from a supported framework into a TVM module. This function takes a union of the arguments of both frontends.load_model and compiler.compile_relay. The resulting TVM module can be executed using the graph executor. Parameters ---------- mod: IRModule The relay module to be compiled. params: dict A dictionary containing the module's parameters. target : str The target for which to compile. Can be a plain string or a path. dump_code : list, optional Dump the generated code for the specified source types, on the requested target. target_host : str, optional The target of the host machine if host-side code needs to be generated. tuning_records: str, optional Path to the file produced by the tuning to be used during compilation. alter_layout: str, optional The layout to convert the graph to. Note, the convert layout pass doesn't currently guarantee the whole of the graph will be converted to the chosen layout. disabled_pass: str, optional Comma-separated list of passes which needs to be disabled during compilation Returns ------- graph : str A JSON-serialized TVM execution graph. lib : tvm.module.Module A TVM module containing the compiled functions. params : dict The parameters (weights) for the TVM module. dumps : dict Dictionary containing the dumps specified. """ dump_code = [x.strip() for x in dump_code.split(",")] if dump_code else None config = {} if alter_layout: mod = common.convert_graph_layout(mod, alter_layout) tvm_target, extra_targets = common.target_from_cli(target) target_host = tvm_target if not target_host else target_host tvm_target, target_host = Target.check_and_update_host_consist( tvm_target, target_host) for codegen_from_cli in extra_targets: codegen = composite_target.get_codegen_by_target( codegen_from_cli["name"]) partition_function = codegen["pass_pipeline"] mod = partition_function(mod, params, **codegen_from_cli["opts"]) if codegen["config_key"] is not None: config[codegen["config_key"]] = codegen_from_cli["opts"] if tuning_records and os.path.exists(tuning_records): logger.debug("tuning records file provided: %s", tuning_records) use_autoscheduler = True try: auto_scheduler.load_records(tuning_records) except tvm._ffi.base.TVMError: use_autoscheduler = False if use_autoscheduler: with auto_scheduler.ApplyHistoryBest(tuning_records): config["relay.backend.use_auto_scheduler"] = True with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph with autoscheduler") graph_module = relay.build(mod, target=target, params=params) else: with autotvm.apply_history_best(tuning_records): with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph with tuning records") graph_module = relay.build(mod, tvm_target, params=params) else: with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph (no tuning records provided)") graph_module = relay.build(mod, tvm_target, params=params) # Generate output dump files with sources dump_code = dump_code or [] dumps = {} for source_type in dump_code: lib = graph_module.get_lib() # TODO lib.get_source call have inconsistent behavior for unsupported # formats (@leandron). source = str(mod) if source_type == "relay" else lib.get_source( source_type) dumps[source_type] = source # TODO we need to update this return to use the updated graph module APIs # as these getter functions will be deprecated in the next release (@leandron) return graph_module.get_json(), graph_module.get_lib( ), graph_module.get_params(), dumps
env.BATCH, env.BLOCK_OUT, env.WGT_WIDTH, start_name=pack_dict[model][0], stop_name=pack_dict[model][1], device_annot=(env.TARGET == "intelfocl"), ) else: relay_prog = mod["main"] # Compile Relay program with AlterOpLayout disabled if target.device_name != "vta": with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}): graph, lib, params = relay.build(relay_prog, target=tvm.target.Target( target, host=env.target_host), params=params) else: if env.TARGET == "intelfocl": # multiple targets to run both on cpu and vta target = {"cpu": env.target_vta_cpu, "ext_dev": target} with vta.build_config( opt_level=3, disabled_pass={"AlterOpLayout", "tir.CommonSubexprElimTIR"}): graph, lib, params = relay.build(relay_prog, target=tvm.target.Target( target, host=env.target_host), params=params) # Measure Relay build time build_time = time.time() - build_start
def test_meta_schedule_relay_lowering(): data_shape = (1, 3, 16, 16) weight_shape = (8, 3, 5, 5) data = relay.var("data", relay.TensorType(data_shape, "float32")) weight = relay.var("weight", relay.TensorType(weight_shape, "float32")) y = relay.nn.conv2d( data, weight, padding=(2, 2), kernel_size=(5, 5), kernel_layout="OIHW", out_dtype="float32", ) f = relay.Function([data, weight], y) mod = tvm.IRModule.from_expr(f) mod = relay.transform.InferType()(mod) data_sample = np.random.rand(*data_shape).astype("float32") weight_sample = np.random.rand(*weight_shape).astype("float32") params = {mod["main"].params[1].name_hint: weight_sample} input_name = "data" dev = tvm.cpu() target = Target("llvm --num-cores=16") data = tvm.nd.array(data_sample, dev) with tempfile.TemporaryDirectory() as work_dir: database = JSONDatabase( osp.join(work_dir, "workload.json"), osp.join(work_dir, "records.json") ) database.commit_tuning_record( TuningRecord( Trace([], {}), [0.0], database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc), target=target, args_info=[], ) ) with ApplyHistoryBest(database): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_meta_schedule": True}, ): rt_mod1 = relay.build(mod, target=target, params=params) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=target, params=params) def get_output(data, lib): module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, rt_mod1) expected_output = get_output(data, rt_mod2) assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
def test_compile_return_empty_tuple(): x = relay.var("x", shape=[16], dtype="float32") mod = tvm.IRModule.from_expr(relay.Function([x], relay.Tuple([]))) graph, lib, _ = relay.build(mod, "llvm") mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) mod.run()
for i in range(num_layers + 1): params["layers.%d.weight" % (i)] = model_params["layers.%d.weight" % (i)] params["layers.%d.bias" % (i)] = model_params["layers.%d.bias" % (i)] # Set the TVM build target target = "llvm" # Currently only support `llvm` as target func = relay.Function(relay.analysis.free_vars(output), output) func = relay.build_module.bind_params_by_name(func, params) mod = tvm.IRModule() mod["main"] = func # Build with Relay with tvm.transform.PassContext( opt_level=0): # Currently only support opt_level=0 lib = relay.build(mod, target, params=params) # Generate graph runtime dev = tvm.device(target, 0) m = graph_runtime.GraphModule(lib["default"](dev)) ###################################################################### # Run the TVM model, test for accuracy and verify with DGL # -------------------------------------------------------- m.run() logits_tvm = m.get_output(0).asnumpy() print("Print the first five outputs from TVM execution\n", logits_tvm[:5]) labels = data.labels test_mask = data.test_mask
def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx): tvm = try_import_tvm() from tvm import relay from tvm.contrib import graph_runtime tvm_recommended_flags = get_ec2_tvm_flags() if ctx.device_type == 'gpu': flags = tvm_recommended_flags['g4'] elif ctx.device_type == 'cpu': flags = tvm_recommended_flags['c4'] if model_name != 'google_albert_base_v2': # Skip all other tests return else: raise NotImplementedError with tempfile.TemporaryDirectory() as root, ctx: model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone( model_name, root=root) cfg.defrost() cfg.MODEL.layout = layout cfg.freeze() model = model_cls.from_cfg(cfg) model.load_parameters(backbone_param_path) model.hybridize() if layout == 'NT': token_ids = mx.np.random.randint(0, cfg.MODEL.vocab_size, (batch_size, seq_length), dtype=np.int32) token_types = mx.np.random.randint(0, 2, (batch_size, seq_length), dtype=np.int32) valid_length = mx.np.random.randint(seq_length // 2, seq_length, (batch_size, ), dtype=np.int32) else: token_ids = mx.np.random.randint(0, cfg.MODEL.vocab_size, (seq_length, batch_size), dtype=np.int32) token_types = mx.np.random.randint(0, 2, (seq_length, batch_size), dtype=np.int32) valid_length = mx.np.random.randint(seq_length // 2, seq_length, (batch_size, ), dtype=np.int32) if 'bart' in model_name: mx_out = model(token_ids, valid_length, token_ids, valid_length) shape_dict = { 'data0': token_ids.shape, 'data1': valid_length.shape, 'data2': token_ids.shape, 'data3': valid_length.shape, } dtype_dict = { 'data0': token_ids.dtype.name, 'data1': valid_length.dtype.name, 'data2': token_ids.dtype.name, 'data3': valid_length.dtype.name, } elif 'roberta' in model_name or 'xlmr' in model_name: mx_out = model(token_ids, valid_length) shape_dict = { 'data0': token_ids.shape, 'data1': valid_length.shape, } dtype_dict = { 'data0': token_ids.dtype.name, 'data1': valid_length.dtype.name, } else: mx_out = model(token_ids, token_types, valid_length) shape_dict = { 'data0': token_ids.shape, 'data1': token_types.shape, 'data2': valid_length.shape } dtype_dict = { 'data0': token_ids.dtype.name, 'data1': token_types.dtype.name, 'data2': valid_length.dtype.name } sym = model._cached_graph[1] params = {} for k, v in model.collect_params().items(): params[v._var_name] = tvm.nd.array(v.data().asnumpy()) mod, params = relay.frontend.from_mxnet(sym, shape=shape_dict, dtype=dtype_dict, arg_params=params) target = flags['target'] use_gpu = flags['use_gpu'] opt_level = flags['opt_level'] required_pass = flags['required_pass'] with tvm.transform.PassContext(opt_level=opt_level, required_pass=required_pass): lib = relay.build(mod, target, params=params) if use_gpu: ctx = tvm.gpu() else: ctx = tvm.cpu() rt = graph_runtime.GraphModule(lib["default"](ctx)) if 'bart' in model_name: rt.set_input(data0=token_ids, data1=valid_length, data2=token_ids, data3=valid_length) elif 'roberta' in model_name: rt.set_input(data0=token_ids, data1=valid_length) else: rt.set_input(data0=token_ids, data1=token_types, data2=valid_length) rt.run() for i in range(rt.get_num_outputs()): out = rt.get_output(i) if rt.get_num_outputs() == 1: mx_out_gt = mx_out.asnumpy() else: mx_out_gt = mx_out[i].asnumpy() npt.assert_allclose(out.asnumpy(), mx_out_gt, rtol=1e-3, atol=1e-1)
mod["main"], env.BATCH, env.BLOCK_OUT, env.WGT_WIDTH, start_name=pack_dict[MODEL_NAME][0], stop_name=pack_dict[MODEL_NAME][1], start_name_idx=pack_dict[MODEL_NAME][2], stop_name_idx=pack_dict[MODEL_NAME][3]) else: mod = mod["main"] # Compile Relay program with AlterOpLayout disabled with vta.build_config(disabled_pass={"AlterOpLayout"}): graph, lib, params = relay.build( mod, target=target, params=params, target_host=env.target_host) # Measure Relay build time build_time = time.time() - build_start print(MODEL_NAME + " inference graph built in {0:.2f}s!".format(build_time)) # Send the inference library over to the remote RPC server temp = util.tempdir() lib.save(temp.relpath("graphlib.o")) remote.upload(temp.relpath("graphlib.o")) lib = remote.load_module("graphlib.o") # Graph runtime m = graph_runtime.create(graph, lib, ctx)
relay.transform.ConvertLayout(desired_layouts) ]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) # Build the module against ARM CPU tvm_target = get_tvm_target(device, get_device_type(), get_device_arch(), get_device_attributes()) cpu_target = "llvm" tvm_targets = tvm.target.Target(tvm_target, host=cpu_target) cpudevice = tvm.runtime.cpu() with tvm.transform.PassContext(opt_level=3): graph_mod = relay.build(mod, tvm_targets, params=params) lib = graph_mod.get_lib() params = graph_mod.get_params() # Create a runtime executor module module = graph_executor.GraphModule(graph_mod["default"](cpudevice)) # Feed input data module.set_input(input_tensor, tvm.nd.array(image_data)) # Feed related params module.set_input(**params) ftimer = module.module.time_evaluator("run", cpudevice, number=1, repeat=10) prof_res = np.array(
def tune_network(network, target): # Extract tasks mod, params = get_network(network) target = tvm.target.Target(target) tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=100, num_measures_per_round=2, early_stopping=1, runner=measure_ctx.runner, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib = relay.build(mod, target=target, params=params) # Sample a schedule when missing with auto_scheduler.ApplyHistoryBestOrSample(None, num_measure=2): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib2 = relay.build(mod, target=target, params=params) # Compile without auto-scheduler and any other optimization for correctness check with tvm.transform.PassContext(opt_level=0): ref_lib = relay.build(mod, target=target, params=params) # Check the correctness def get_output(data, lib): dev = tvm.gpu() module = graph_executor.GraphModule(lib["default"](dev)) module.set_input("data", data) module.run() return module.get_output(0).asnumpy() np.random.seed(0) if network == "mlp": data = np.random.uniform(size=(1, 32)) elif network == "winograd-test": data = np.random.uniform(size=(1, 23, 40, 32)) else: raise ValueError("Unknown network: " + network) actual_output1 = get_output(data, lib) actual_output2 = get_output(data, lib2) expected_output = get_output(data, ref_lib) tvm.testing.assert_allclose(actual_output1, expected_output, rtol=1e-4, atol=1e-4) tvm.testing.assert_allclose(actual_output2, expected_output, rtol=1e-4, atol=1e-4)
def build(target): mod, params = relay.frontend.from_mxnet(block, {"data": dshape}) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target, params=params) return lib
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', required=True, help='Test data path') parser.add_argument('-t', '--target', required=True, help='Target device for inference') parser.add_argument('-m', '--max', type=int, default=100, help='Retrieve the maximum number of images') args = parser.parse_args() argv_test_data_path = args.path argv_target = args.target argv_max = args.max print(argv_test_data_path) print(argv_target) print(argv_max) # download pre-trained model from mxnet model_zoo block = vision.get_model('MobileNet1.0', pretrained=True) # ImageNet Label # Synset for converting the number of ImageNet classes to human vocabulary synset_path = "./imagenet1000_clsid_to_human.txt" with open(synset_path) as f: # text_labels = [' '.join(l.split()[1:]) for l in f] text_labels = eval(f.read()) get_test_data(argv_test_data_path, argv_max) print('Relay: get model from mxnet...') img_ = transform_image_np(img_list[0]) print('img', img_.shape, 'type: ', type(img_)) shape_dict = {'data': img_.shape} print('Block: {0}, Dict_shape: {1}'.format(type(block), type(shape_dict))) mod, params = relay.frontend.from_mxnet(block, shape_dict) print('Mod: {0}, Params: {1}'.format(type(mod), type(params))) func = mod['main'] func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) print("Relay: build the graph") # target = 'llvm' if argv_target == 'llvm': target = tvm.target.create('llvm') ctx = tvm.cpu(0) elif argv_target == 'cuda': target = tvm.target.create('cuda') ctx = tvm.gpu(0) else: target = argv_target with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) print("Graph: {0}, lib: {1}, params: {2}".format(type(graph), type(lib), type(params))) print('Tvm: run the graph') dtype = 'float32' m = graph_runtime.create(graph, lib, ctx) print('Input the img') start_time_tvm = time.time() prob_avg = 0 count = 0 for img_ in img_list: count += 1 m.set_input('data', tvm.nd.array(transform_image_np(img_).astype(dtype))) m.set_input(**params) m.run() tvm_output = m.get_output(0) tvm_output = tvm_output.asnumpy()[0] idx = np.argsort(tvm_output)[-3:][::-1] # print('With prob = %.5f, it contains %s' % (tvm_output[idx[0]], text_labels[idx[0]])) prob_avg += tvm_output[idx[0]] print('Average accuracy = %0.5f' % float(prob_avg / count)) print('Cost of time: %.5f sec' % (time.time() - start_time_tvm))
sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (model_dir, inference_symbol_folder)) _, arg_params, aux_params = load_checkpoint("%s/%s" % (model_dir, model_name), 0) import argparse parser = argparse.ArgumentParser() parser.add_argument( "-f", "--frontend", help="Frontend for compilation, nnvm or relay", type=str, default="nnvm") args = parser.parse_args() if args.frontend == "relay": net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \ aux_params=aux_params) with relay.build_config(opt_level=3): graph, lib, params = relay.build(net, target, params=params) elif args.frontend == "nnvm": net, params = from_mxnet(sym, arg_params, aux_params) with compiler.build_config(opt_level=3): graph, lib, params = compiler.build( net, target, {"data": dshape}, params=params) else: parser.print_help() parser.exit() ###################################################################### # Create TVM runtime and do inference # Preprocess image image = cv2.imread(test_image_path) img_data = cv2.resize(image, (dshape[2], dshape[3]))
def tune_and_evaluate(tuning_opt): # Register VTA tuning tasks register_vta_tuning_tasks() # Perform task extraction on Relay program print("Extract tasks...") relay_prog, params = compile_network(env, target, network, start_pack, stop_pack) mod = tvm.IRModule.from_expr(relay_prog) tasks = autotvm.task.extract_from_program( mod, params=params, ops=(relay.op.get("nn.conv2d"), ), target=target, target_host=env.target_host, ) # filter out non-packed conv2d task tasks = list(filter(lambda t: len(t.args[0][1]) > 4, tasks)) # We should have extracted 10 convolution tasks assert len(tasks) == 10 print("Extracted {} conv2d tasks:".format(len(tasks))) for tsk in tasks: inp = tsk.args[0][1] wgt = tsk.args[1][1] batch = inp[0] * inp[4] in_filter = inp[1] * inp[5] out_filter = wgt[0] * wgt[4] height, width = inp[2], inp[3] hkernel, wkernel = wgt[2], wgt[3] hstride, wstride = tsk.args[2][0], tsk.args[2][1] hpad, wpad = tsk.args[3][0], tsk.args[3][1] print("({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format( batch, height, width, in_filter, out_filter, hkernel, wkernel, hpad, wpad, hstride, wstride, )) # We do not run the tuning in our webpage server since it takes too long. # Comment the following line to run it by yourself. return # run tuning tasks print("Tuning...") tune_tasks(tasks, **tuning_opt) # evaluate with tuning history if env.TARGET != "sim": # Get remote from fleet node remote = autotvm.measure.request_remote(env.TARGET, tracker_host, tracker_port, timeout=10000) # Reconfigure the JIT runtime and FPGA. vta.reconfig_runtime(remote) vta.program_fpga(remote, bitstream=None) else: # In simulation mode, host the RPC server locally. remote = rpc.LocalSession() # compile kernels with history best records with autotvm.tophub.context(target, extra_files=[log_file]): # Compile network print("Compile...") if target.device_name != "vta": with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}): lib = relay.build(relay_prog, target=target, params=params, target_host=env.target_host) else: with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): lib = relay.build(relay_prog, target=target, params=params, target_host=env.target_host) # Export library print("Upload...") temp = utils.tempdir() lib.export_library(temp.relpath("graphlib.tar")) remote.upload(temp.relpath("graphlib.tar")) lib = remote.load_module("graphlib.tar") # Generate the graph runtime ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0) m = graph_runtime.GraphModule(lib["default"](ctx)) # upload parameters to device image = tvm.nd.array( (np.random.uniform(size=(1, 3, 224, 224))).astype("float32")) m.set_input("data", image) # evaluate print("Evaluate inference time cost...") timer = m.module.time_evaluator("run", ctx, number=1, repeat=10) tcost = timer() prof_res = np.array(tcost.results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def test_compile_tuple_dup(): x = relay.var("data", shape=(16, 16)) log = relay.log(x) output = relay.Tuple([log, log]) f = relay.Function([x], output) relay.build(f, 'llvm')
tasks, task_weights = auto_scheduler.extract_tasks( mod["main"], params, target=target_host, target_host=target_host) for idx, task in enumerate(tasks): print("========== Task %d (workload key: %s) ==========" % (idx, task.workload_key)) print(task.compute_dag) run_tuning(tasks, task_weights, log_file) print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, target_host=target_host, params=params) print("Upload") tmp = tempdir() filename = "net.tar" lib.export_library(tmp.relpath(filename)) remote = auto_scheduler.utils.request_remote("m1", "127.0.0.1", 9190) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) print("run") input_shape = [1, 128] dtype = "int64" ctx = remote.device(str(target), 0) module = runtime.graph_executor.GraphModule(rlib["default"](ctx))
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials= 200, # change this to 20000 to achieve the best performance runner=auto_scheduler.RPCRunner( device_key, host="0.0.0.0", port=9191, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() if use_ndk: from tvm.contrib import ndk filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "net.tar" lib.export_library(tmp.relpath(filename)) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, "0.0.0.0", 9191, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph runtime dev = remote.cpu() module = graph_runtime.GraphModule(rlib["default"](dev)) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
input_image = input_image.transpose([3, 2, 0, 1]) # Compile the model on Relay # --------------------------- # We should be familiar with the process right now. input_tensor = "data" input_shape = input_image.shape shape_dict = {input_tensor:input_shape} print("shape: ",shape_dict) target = 'llvm' # Parse mxnet model and convert into Relay computation graph mod, params = relay.frontend.from_mxnet(model, shape_dict) with relay.build_config(opt_level=3): graph, lib, params = relay.build(mod, target, params=params) ###################################################################### # Execute on TVM # ------------------- # The process is no different from other example from tvm.contrib import graph_runtime ctx = tvm.cpu(0) m = graph_runtime.create(graph, lib, ctx) dtype = 'float32' #complete a inference m.set_input("data", tvm.nd.array(input_image.astype(dtype))) m.set_input(**params) # set start time
def check_function(symbol, forward=None, backward=None, grad_input_vars=None, shape=None, dtype=None, in_range=None, values=None, exclude_targets=None, only_targets=None, additional_params=None, numerical_grads=None, numerical_grads_params=None, atol=1e-5, rtol=1e-5, quiet=False): """Compute the function and/or its gradients on a random input and raise an exception if the result doesn't match the reference implementation. Parameters ---------- symbol : nnvm.Symbol A symbol representing the output. forward : Callable[..., List[numpy.ndarray]], optional A reference implementation to compare with. backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional A reference implementation of gradients. Should also accept head_grads besides normal inputs which is a list of gradients of some scalar wrt the outputs or just a single gradient if there are multiple outputs. Should return either a dict mapping input variable names to the respective gradients or a list of gradients wrt variables from grad_input_vars in exactly the same order (in alphabetical order by default). grad_input_vars : List[nnvm.Symbol or str], optional A list of variables with respect to which the gradients will be computed. None (default) means that all input variables will be used in an alphabetical order. shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional A dict mapping input variable names to shapes, or just a single shape. By default shapes will be inferred from variables' attributes (see the Examples). Note that this parameter takes precedence over variables' attributes. dtype : Dict[nnvm.Symbol or str, str] or str, optional A dict mapping input variable names to dtypes, or just a single dtype. By default dtypes will be inferred from variables' attributes (see the Examples). If dtypes cannot be inferred for some variables then float32 will be used as a fallback. Note that this parameter takes precedence over variables' attributes. in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional A dict mapping input variable names to ranges or just a single range (the same for all variables). Input values will be generated from uniform distributions on these ranges. `head_grads` can also be assigned a range this way. values : Dict[nnvm.Symbol or str, numpy.ndarray], optional A dict explicitly providing values for some variables instead of random generation. exclude_targets : Set[str], optional Skip compiling and running anything for these targets. only_targets : Set[str], optional Test only for those targets from `ctx_list()` that are also in this set. additional_params : dict, optional A dict of additional parameters which will be passed to forward and backward. numerical_grads : bool or 'if_possible', optional Whether to additionally check against numerically computed gradients. If 'if_possible' or None is passed (which is the default) then it will try to create a gradient computation graph and then check gradients numerically only if this graph can be created (i.e. if there are some operations with unimplemented gradients, it will just issue a warning). Checking against numerical gradients is done via the `check_numerical_grads` function. numerical_grads_params : dict, optional Additional parameters for `check_numerical_grads`. atol : float, optional Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients. rtol : float, optional Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients. quiet : bool, optional Don't dump additional information to stdout on failure. Examples -------- .. code-block:: python x = sym.Variable("x", shape=(1, 2)) y = sym.Variable("y", shape=(1, 2)) # check the function and its gradients both numerically and using a reference function check_function(x + 2*y, lambda x, y: x + 2*y, lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads}) # just check gradients numerically check_function(x + 2*y, numerical_grads=True) # just check the forward computation check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False) # specifying dtype check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64') # dtypes can also be specified during variable creation with dtype codes x = sym.Variable("x", dtype=0) check_function(x + 1, shape=(2, 2), numerical_grads=True) """ # validate and preprocess the input params if numerical_grads is None and forward is None and backward is None: raise ValueError("No reference function was passed to check_function. If you only want to " "check gradients numerically, pass numerical_grads=True explicitly.") if numerical_grads is None: numerical_grads = 'if_possible' if numerical_grads not in [False, True, 'if_possible']: raise ValueError("numerical_grads must be a bool or 'if_possible', not {}" .format(numerical_grads)) if additional_params is None: additional_params = {} input_vars = symbol.list_input_variables() input_dict = {x.attr('name'): x for x in input_vars} if grad_input_vars is None: grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name')) else: grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars] in_range = _dict_var_to_dict_str(in_range) values = _dict_var_to_dict_str(values) out_len = len(symbol.list_output_names()) # Infer the output shapes and dtypes, and preprocess the shape and dtype params forward_graph, shape, dtype, out_shapes, out_dtypes = \ infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype, fallback_dtype='float32') if not all(out_shapes) or not all(out_dtypes): if not quiet: print(forward_graph.ir(join_node_attrs=['shape', 'dtype'])) raise ValueError("Could not infer shapes or dtypes for outputs.\n" "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes)) backward_graph = None # If we want gradients, we have to recreate the graph, but now with gradient computations # Note that here we need out_shapes for defining the shape of head grads, so we have to # create the graph twice if backward is not None or numerical_grads: try: head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i), shape=out_shapes[i], dtype=DTYPE_TO_TCODE[out_dtypes[i]]) for i in range(out_len)] grad_symbols = graph_util.gradients([symbol], grad_input_vars, grad_ys=head_grads_symbols) # Sometimes grads do not depend on head_grads, so head_grads does not appear # in the variable list; adding it manually prevents this, making things a bit easier backward_graph = \ nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols)) backward_graph, shape, dtype, out_shapes, out_dtypes = \ infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype, fallback_dtype='float32') except nnvm._base.NNVMError as err: if backward is None and numerical_grads == "if_possible": logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0]) numerical_grads = False backward_graph = None else: raise main_graph = backward_graph if backward_graph is not None else forward_graph # Generate random data for inputs (including head_grads) np_inputs = {} for x in main_graph.symbol.list_input_variables(): x_name = x.attr('name') x_shape = shape[x_name] x_dtype = dtype[x_name] if values is not None and x_name in values: np_inputs[x_name] = values[x_name].astype(x_dtype) continue low = -1.0 high = 1.0 if in_range is not None: if isinstance(in_range, dict): if x_name in in_range: low = in_range[x_name][0] high = in_range[x_name][1] else: low = in_range[0] high = in_range[1] np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype) np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs if not k.startswith('head_grads_')} nothing_was_done = True # Compute and compare the results for target, ctx in ctx_list(): if exclude_targets is not None: if target in exclude_targets or str(target) in exclude_targets: logging.info("Skipping target = %s, ctx = %s", target, ctx) continue if only_targets is not None: if target not in only_targets and str(target) not in only_targets: logging.info("Skipping target = %s, ctx = %s", target, ctx) continue logging.info("Checking computation on target = %s, ctx = %s", target, ctx) debug_stage = None try: nnvm_res = None debug_stage = "compiling" main_function = graph_to_function(main_graph, target, ctx) # nnvm_res contains the output and gradients (if they are needed) debug_stage = "running" nnvm_res = main_function(**np_inputs) try: logging.debug("checking to_relay conversion") inputs = np_inputs_without_head_grads.copy() func, inputs = to_relay(main_graph, shape, dtype, params=inputs) with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target=target) m = graph_runtime.create(graph, lib, ctx) m.set_input(**inputs) m.set_input(**params) m.run() for i in range(out_len): relay_out = m.get_output(i).asnumpy() tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol) except NotImplementedError as err: # the NNVM operator is not supported yet logging.warning(err) if backward_graph is not None: grad_var_names = [x.attr('name') for x in grad_input_vars] nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])} if forward is not None: nothing_was_done = False debug_stage = "checking forward computation" logging.debug(debug_stage) params = {} params.update(np_inputs_without_head_grads) params.update(additional_params) numpy_res = forward(**params) if isinstance(numpy_res, tuple): numpy_res = list(numpy_res) if not isinstance(numpy_res, list): numpy_res = [numpy_res] if len(numpy_res) != out_len: raise ValueError("Forward function returned {} values, but " "the nnvm graph returns {} values" .format(len(numpy_res), out_len)) for i in range(out_len): tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol) if backward is not None: nothing_was_done = False debug_stage = "checking gradients" logging.debug(debug_stage) np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)] if out_len == 1: np_head_grads = np_head_grads[0] params = {'head_grads': np_head_grads} params.update(np_inputs_without_head_grads) params.update(additional_params) numpy_grads = backward(**params) if not isinstance(numpy_grads, dict): if isinstance(numpy_grads, tuple): numpy_grads = list(numpy_grads) if not isinstance(numpy_grads, list): numpy_grads = [numpy_grads] numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)} if len(numpy_grads) != len(grad_var_names): raise ValueError("The backward function returns a list of gradients which " "does not contain gradients for these variables: {}" .format(set(grad_var_names) - set(numpy_grads))) for x_name in numpy_grads: tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name], atol=atol, rtol=rtol) if numerical_grads: nothing_was_done = False debug_stage = "checking gradients numerically" logging.debug(debug_stage) forward_function = graph_to_function(forward_graph, target, ctx) # Since the result may be non-scalar, we have to put another operation on the top, # so we just multiple by the randomly generated head_grads and then sum everything. # This way we can reuse the gradient values which has been already computed. def scalar_function(**kwargs): res = forward_function(**kwargs) return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel()) for i in range(out_len)]) if numerical_grads_params is None: numerical_grads_params = {} check_numerical_grads( scalar_function, input_values=np_inputs_without_head_grads, grad_values=nnvm_grads, **numerical_grads_params) except: if not quiet: print("\ncheck_function failed while {}, here is the main graph" .format(debug_stage)) print(main_graph.ir(join_node_attrs=['shape', 'dtype'])) if nnvm_res is not None: print("Generated inputs:") print(np_inputs) print() raise if nothing_was_done: logging.warning("Nothing was done in check_function. Check ctx_list().")
def run_unpropagatable_graph(dev, tgt): R""" The network is as following: a b c d \ / \ / add mul \ / subtract """ a = relay.var("a", shape=(10, 10)) b = relay.var("b", shape=(10, 10)) c = relay.var("c", shape=(10, 10)) d = relay.var("d", shape=(10, 10)) a_data = np.random.rand(10, 10).astype('float32') b_data = np.random.rand(10, 10).astype('float32') c_data = np.random.rand(10, 10).astype('float32') d_data = np.random.rand(10, 10).astype('float32') tmp_add = a_data + b_data tmp_mul = np.multiply(c_data, d_data) ref_res = np.subtract(tmp_add, tmp_mul) fallback_device = tvm.context("cpu") target = {"cpu": "llvm", dev: tgt} cpu_ctx = fallback_device dev_ctx = tvm.context(dev) def annotated(): add = relay.add(a, b) _add = relay.annotation.on_device(add, dev_ctx) mul = relay.multiply(c, d) _mul = relay.annotation.on_device(mul, cpu_ctx) sub = relay.subtract(_add, _mul) _sub = relay.annotation.on_device(sub, dev_ctx) func = relay.Function([a, b, c, d], _sub) func = run_opt_pass(func, transform.RewriteAnnotatedOps(dev_ctx.device_type)) return func def expected(): add = relay.add(a, b) mul = relay.multiply(c, d) copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx) sub = relay.subtract(add, copy_mul_sub) func = relay.Function([a, b, c, d], sub) return func annotated_func = annotated() expected_func = expected() expected_index = [2, 2, 2, 1, 1, 1, 2, 2] check_annotated_graph(annotated_func, expected_func) params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data} with tvm.transform.PassContext( opt_level=0, config={"relay.fallback_device_type": fallback_device.device_type}): graph, lib, params = relay.build(annotated_func, target, params=params) contexts = [tvm.cpu(0), tvm.context(dev)] graph_json = json.loads(graph) if "device_index" in graph_json["attrs"]: device_index = graph_json["attrs"]["device_index"][1] assert device_index == expected_index mod = graph_runtime.create(graph, lib, contexts) mod.set_input(**params) mod.run() res = mod.get_output(0).asnumpy() tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
def test_inception_like(): def conv(data): y = relay.nn.conv2d(data, relay.var("w"), kernel_size=(3, 3), padding=(1, 1), channels=16) return relay.nn.relu(data=y) def inception_like(data): c0 = conv(data) c1 = conv(data) return relay.concatenate((c0, c1), axis=1) def before(dshape): x = relay.var("x", shape=dshape) in1 = inception_like(x) in2 = inception_like(in1) return relay.Function(relay.analysis.free_vars(in2), in2) def expected(dshape): p0 = relay.var("p0", shape=dshape) c = conv(p0) f0 = relay.Function(relay.analysis.free_vars(c), c) f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p01 = relay.var("p01", shape=dshape) c = conv(p01) f1 = relay.Function(relay.analysis.free_vars(c), c) f1 = f1.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p02 = relay.var("p02", shape=dshape) p12 = relay.var("p12", shape=dshape) concat1 = relay.concatenate((p02, p12), axis=1) f_concat1 = relay.Function([p02, p12], concat1) f_concat1 = f_concat1.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) dshape2 = (dshape[0], dshape[1] * 2, dshape[2], dshape[3]) p03 = relay.var("p03", shape=dshape2) c = conv(p03) f2 = relay.Function(relay.analysis.free_vars(c), c) f2 = f2.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p04 = relay.var("p04", shape=dshape2) c = conv(p04) f3 = relay.Function(relay.analysis.free_vars(c), c) f3 = f3.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) p05 = relay.var("p05", shape=dshape) p15 = relay.var("p15", shape=dshape) concat2 = relay.concatenate((p05, p15), axis=1) f_concat2 = relay.Function([p05, p15], concat2) f_concat2 = f_concat2.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) x = relay.var("x", shape=dshape) c1 = relay.Call(f0, [x, relay.var("w1")]) c2 = relay.Call(f1, [x, relay.var("w2")]) concat = relay.Call(f_concat1, [c1, c2]) c3 = relay.Call(f2, [concat, relay.var("w3")]) c4 = relay.Call(f3, [concat, relay.var("w4")]) out = relay.Call(f_concat2, [c3, c4]) return relay.Function(relay.analysis.free_vars(out), out) dshape = (1, 16, 64, 64) orig = before(dshape) fuse0(tvm.IRModule.from_expr(orig)) m = fuse2(tvm.IRModule.from_expr(orig)) relay.build(m, "llvm") after = run_opt_pass(expected(dshape), transform.InferType()) assert tvm.ir.structural_equal(m["main"], after)
def test_inception_like(): def conv(data): y = relay.nn.conv2d(data, relay.var("w"), kernel_size=(3, 3), padding=(1, 1), channels=16) return relay.nn.relu(data=y) def inception_like(data): c0 = conv(data) c1 = conv(data) return relay.concatenate((c0, c1), axis=1) def before(dshape): x = relay.var("x", shape=dshape) in1 = inception_like(x) in2 = inception_like(in1) return relay.Function(relay.ir_pass.free_vars(in2), in2) def expected(dshape): p0 = relay.var("p0", shape=dshape) c = conv(p0) f0 = relay.Function(relay.ir_pass.free_vars(c), c) p01 = relay.var("p01", shape=dshape) c = conv(p01) f1 = relay.Function(relay.ir_pass.free_vars(c), c) p02 = relay.var("p02", shape=dshape) p12 = relay.var("p12", shape=dshape) concat1 = relay.concatenate((p02, p12), axis=1) f_concat1 = relay.Function([p02, p12], concat1) dshape2 = (dshape[0], dshape[1]*2, dshape[2], dshape[3]) p03 = relay.var("p03", shape=dshape2) c = conv(p03) f2 = relay.Function(relay.ir_pass.free_vars(c), c) p04 = relay.var("p04", shape=dshape2) c = conv(p04) f3 = relay.Function(relay.ir_pass.free_vars(c), c) p05 = relay.var("p05", shape=dshape) p15 = relay.var("p15", shape=dshape) concat2 = relay.concatenate((p05, p15), axis=1) f_concat2 = relay.Function([p05, p15], concat2) x = relay.var("x", shape=dshape) c1 = relay.Call(f0, [x, relay.var("w1")]) c2 = relay.Call(f1, [x, relay.var("w2")]) concat = relay.Call(f_concat1, [c1, c2]) c3 = relay.Call(f2, [concat, relay.var("w3")]) c4 = relay.Call(f3, [concat, relay.var("w4")]) out = relay.Call(f_concat2, [c3, c4]) return relay.Function(relay.ir_pass.free_vars(out), out) dshape = (1, 16, 64, 64) z = before(dshape) z = relay.ir_pass.infer_type(z) zz = relay.ir_pass.fuse_ops(z, opt_level=0) assert not relay.ir_pass.free_vars(zz) zz = relay.ir_pass.fuse_ops(z, opt_level=2) relay.build(zz, 'llvm') zz = relay.ir_pass.infer_type(zz) assert not relay.ir_pass.free_vars(zz) after = relay.ir_pass.infer_type(expected(dshape)) assert relay.ir_pass.alpha_equal(zz, after)
n, h, w, c = 1, 130, 130, 128 o, kc, kh, kw = 128, c, 3, 3 img = relay.var('x', relay.ty.TensorType((n, h, w, c), 'int8')) knl = relay.var('w', relay.ty.TensorType((kh, kw, o // 16, c // 4, 16, 4), 'int8')) conv2d_vnni = relay.op.nn.conv2d_vnni(img, knl, strides=1, padding=0) func = relay.Function([img, knl], conv2d_vnni) ops = n * (h - kh + 1) * (w - kw + 1) * o * kc * kh * kw / 64 import vnni import numpy as np module = tvm.IRModule.from_expr(func) with tvm.build_config(add_lower_pass= [(1, vnni.vnni_transformation)]): graph, module, params = relay.build(func, target='llvm -mcpu=cascadelake') x_ = tvm.nd.array((np.random.randn(n, h, w, c) * 255).astype('int8'), ctx=tvm.cpu()) w_ = tvm.nd.array((np.random.randn(kw, kh, o // 16, c // 4, 16, 4) * 255).astype('int8'), ctx=tvm.cpu()) y_ = tvm.nd.array((np.random.randn(n, h - kh + 1, w - kw + 1, o) * 255).astype('int32'), ctx=tvm.cpu()) module = module.time_evaluator(module.entry_name, tvm.cpu(), number=5) span = module(x_, w_, y_).mean print('Exec Time: ', span) print('%.2f GVNNI/s' % (ops / span / 1e9)) #module = tvm.contrib.graph_runtime.create(graph, module, tvm.cpu()) #module.set_input('x', x) #module.set_input('w', w) #module.run()