def _lower(mod, target, params): """ Helper to lower VTA properly. """ # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": import vta with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(mod["main"]) return # default case # Try graph codegen first to extract autotvm tasks. # If failed to compile, then fallback to use VM compiler. # TODO: Currently VM compiler is likely to stack overflow for large models. try: opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError: compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod, target=target)
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute to extract auto_scheduler tasks in a Relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen # Turn off AutoTVM config not found warnings old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent autotvm.GLOBAL_SCOPE.silent = True with transform.PassContext( opt_level=3, config={ "relay.backend.use_auto_scheduler": True, "relay.backend.disable_compile_engine_cache": True, }, disabled_pass={"AutoSchedulerLayoutRewrite"}, ): try: opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError: print("Get errors with GraphRuntimeCodegen for task extraction. " "Fallback to VMCompiler.") compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) mod = tvm.IRModule.from_expr(mod) if isinstance( mod, relay.Function) else mod compiler.lower(mod, target) autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def test_yxnet_mnist(): graph = make_mnist_graph() _, bd = load_parameters(graph, "/home/wlt/warehouse/.tmp/ca3d0286d5758697cdef653c1375960a868ac08a/data/params") with relay.build_config(opt_level=0): func = graph func = relay.ir_pass.infer_type(func) func = relay.ir_pass.fuse_ops(func, 0) # print (relay.Function(relay.ir_pass.free_vars(func), func)) func = relay.ir_pass.infer_type(func) graph_gen = _graph_gen.GraphRuntimeCodegen(mod=None, target='llvm') graph_json, lowered_funcs, params = graph_gen.codegen(func) dump_sym = './data/yxnet_mnist.symbol' dump_params = './data/yxnet_mnist.params' with open(dump_sym, 'w') as fout: fout.write(graph_json) with open(dump_params, "wb") as fo: fo.write(relay.save_param_dict(params)) data = np.load('data.npy') executor = relay.create_executor() res = executor.evaluate(graph)([data.astype(np.int8)], **bd).asnumpy() np.save('/tmp/relay.res', res) print (res.flatten()[:100])
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute + schedule to extract tasks in a relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen with transform.PassContext(opt_level=3): opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"])
def _lower(func, target, params): """ Helper to lower VTA properly. """ from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): import vta with vta.build_config(): mod, _ = relay.optimize(func, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) return grc.codegen(mod["main"]) # default case mod, _ = relay.optimize(func, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) return grc.codegen(mod["main"])
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute to extract auto_scheduler tasks in a Relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen # Turn off AutoTVM config not found warnings old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent autotvm.GLOBAL_SCOPE.silent = True with transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}): opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def _lower(mod, target, params): """ Helper to lower VTA properly. """ # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): import vta with vta.build_config(): mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(mod["main"]) # default case compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod, target=target)
def test_naive(): data = relay.var("data", relay.TensorType((1, 3, 224, 224), "int8")) out = data prefix = "_conv_" + 'cv0' weight = relay.var(prefix+"_weight", dtype="int8") out = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1), strides=(1, 1), channels=64, out_dtype="int32") out = relay.nn.leaky_relu(out, alpha = 0.1) out = relay.Function(relay.ir_pass.free_vars(out), out) graph = out with relay.build_config(opt_level=0): func = graph func = relay.ir_pass.infer_type(func) func = relay.ir_pass.fuse_ops(func, 0) func = relay.ir_pass.infer_type(func) graph_gen = _graph_gen.GraphRuntimeCodegen(mod=None, target='llvm') graph_json, lowered_funcs, params = graph_gen.codegen(func) print (graph_json)
def _run_tvm(data, proto_file, blob_file): """ Run caffe model by TVM according to .caffemodel and .prototxt""" init_net = pb.NetParameter() predict_net = pb.NetParameter() # load model with open(proto_file, "r") as f: text_format.Merge(f.read(), predict_net) # load blob with open(blob_file, "rb") as f: init_net.ParseFromString(f.read()) shape_dict = dict() dtype_dict = dict() if isinstance(data, (tuple, list)): for idx, d in enumerate(data): shape_dict["data" + str(idx)] = d.shape dtype_dict["data" + str(idx)] = "float32" else: shape_dict = {"data": data.shape} dtype_dict = {"data": "float32"} #print("++++++++++++++++++++++++++++++++++++++++") net, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict) tg = "dpu" print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") #mod, _ = relay.optimize(net, tg, params) with relay.build_config(opt_level=0): mod, _ = relay.optimize(net, tg, params) #lib = relay.build(mod, target=target, target_host=target_host, params=params) #graph, lib, params = relay.build_module.build(mod, target=tg, params=params) graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"]) dtype = "float32" func=tvm.build(func0, tg, name="default_function") f = open('/home/wangjj/wujq/test/alexnetCode.c', 'w') print(func.get_source(), file = f) f.close() print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
simple_net = relay.nn.bias_add(data=simple_net, bias=bias6) simple_net = relay.nn.relu(data=simple_net) simple_net = relay.nn.dense(data=simple_net, weight=conv7_weight,units=1000) simple_net = relay.nn.bias_add(data=simple_net, bias=bias7) simple_net = relay.nn.relu(data=simple_net) simple_net = relay.nn.dense(data=simple_net, weight=conv8_weight,units=1000) simple_net = relay.nn.bias_add(data=simple_net, bias=bias8) simple_net = relay.reshape(simple_net, (batch_size,1000)) simple_net = relay.nn.softmax(data=simple_net) tic = timer() node = relay.analysis.free_vars(simple_net) simple_net = relay.Function(node, simple_net) net, params = testing.create_workload(simple_net) tg = "c" with relay.build_config(opt_level=3): mod, _ = relay.optimize(net, tg, params) graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"]) func=tvm.build(func0, tg, name="default_function") toc = timer() print("AlexNet compile on TVM time is : ", (toc-tic))
data = relay.var("data", relay.TensorType((batch_size,3,4,4), "float32")) conv1_weight = relay.var("conv1_weight") conv2_weight = relay.var("conv2_weight") dense1_weight = relay.var("dense1_weight") dense2_weight = relay.var("dense2_weight") simple_net = relay.nn.conv2d(data=data, weight=conv1_weight, kernel_size=(2,2), channels=2, strides=(2,2),padding=(1, 1)) #simple_net = relay.nn.max_pool2d(simple_net,pool_size=(2, 2),strides=(2, 2),padding=(1, 1)) #simple_net = relay.nn.batch_flatten(simple_net) #simple_net = relay.nn.dense(simple_net, dense1_weight,units=10) #simple_net = relay.nn.relu(simple_net) #simple_net = relay.nn.softmax(simple_net,1) node = relay.analysis.free_vars(simple_net) print("**************test1*************") simple_net = relay.Function(node, simple_net) print("**************test2*************") net, params_tmp = testing.create_workload(simple_net) target="cuda" print("**************test3*************") mod, _ = relay.optimize(net, target, params_tmp) print("**************test4*************") grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) print("**************test5*************") graph0, func0, params0 = grc.codegen(mod["main"]) print(func0) print("**************test6*************") func=tvm.build(func0, target, name="default_function") print("**************test7*************") print(func.get_source())