def test_save_load(): x = np.ones((10, 2)).astype("float32") y = np.ones((1, 2, 3)).astype("float32") params = {"x": x, "y": y} param_bytes = relay.save_param_dict(params) assert isinstance(param_bytes, bytearray) param2 = relay.load_param_dict(param_bytes) assert len(param2) == 2 np.testing.assert_equal(param2["x"].asnumpy(), x) np.testing.assert_equal(param2["y"].asnumpy(), y)
def test_ndarray_reflection(): # Make two `NDArrayWrapper`s that point to the same underlying array. np_array = np.random.uniform(size=(10, 2)).astype("float32") tvm_array = tvm.nd.array(np_array) param_dict = {'x': tvm_array, 'y': tvm_array} assert param_dict['x'].same_as(param_dict['y']) # Serialize then deserialize `param_dict`. deser_param_dict = relay.load_param_dict(relay.save_param_dict(param_dict)) # Make sure the data matches the original data and `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict['x'].asnumpy(), tvm_array.asnumpy()) # Make sure `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict['x'].asnumpy(), deser_param_dict['y'].asnumpy())
def verify_graph_runtime(remote, target, shape, dtype): x = relay.var('x') y = relay.const(1) z = relay.add(x, y) func = relay.Function([x], z) x_in = np.ones(shape).astype(dtype) params = {'x': x_in} graph, lib, params = relay.build(func, target=target, params=params) temp = util.tempdir() path_dso = temp.relpath("dev_lib.o") lib.save(path_dso) remote.upload(path_dso) lib = remote.load_module("dev_lib.o") ctx = remote.cpu(0) mod = graph_runtime.create(graph, lib, ctx) mod.load_params(relay.save_param_dict(params)) mod.run() out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx)) tvm.testing.assert_allclose(x_in + 1, out.asnumpy())
def test_build(): m_bld = BuildModule() tgt_name = "llvm" tgt = "llvm" ctx = tvm.cpu() # func a = relay.var("a", dtype="float32", shape=(16, 8)) b = relay.var("b", dtype="float32", shape=(8, 8)) c = relay.var("c", dtype="float32", shape=(16, 8)) x = relay.nn.dense(a, b) y = relay.nn.relu(x) z = y + c func = relay.Function([a, b, c], z) A = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), ctx=ctx) B = tvm.nd.array(np.random.uniform(-1, 1, (8, 8)).astype("float32"), ctx=ctx) C = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), ctx=ctx) params = { "b" : B, "c" : C } # build targets = { tgt: tgt } m_bld.set_opt_level(3) m_bld.build(func, targets, "llvm", params=params) g_json = m_bld.get_json() mmod = m_bld.get_module() params = m_bld.get_params() # test rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx) rt.set_input("a", A) rt.load_params(relay.save_param_dict(params)) rt.run() out = rt.get_output(0) np.testing.assert_allclose(out.asnumpy(), np.maximum(np.dot(A.asnumpy(), B.asnumpy().T), 0) + C.asnumpy(), atol=1e-5, rtol=1e-5)
def build_module(opts): dshape = (1, 3, 224, 224) from mxnet.gluon.model_zoo.vision import get_model block = get_model('mobilenet0.25', pretrained=True) shape_dict = {'data': dshape} mod, params = relay.frontend.from_mxnet(block, shape_dict) func = mod["main"] func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, 'llvm --system-lib', params=params) build_dir = os.path.abspath(opts.out_dir) if not os.path.isdir(build_dir): os.makedirs(build_dir) lib.save(os.path.join(build_dir, 'model.o')) with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(params))
def build_module(opts): dshape = (1, 3, 224, 224) from mxnet.gluon.model_zoo.vision import get_model block = get_model("mobilenet0.25", pretrained=True) shape_dict = {"data": dshape} mod, params = relay.frontend.from_mxnet(block, shape_dict) func = mod["main"] func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) for runtime_name, file_format_str in RUNTIMES.items(): with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): graph, lib, params = relay.build( func, f"llvm --runtime={runtime_name} --system-lib", params=params) build_dir = os.path.abspath(opts.out_dir) if not os.path.isdir(build_dir): os.makedirs(build_dir) lib.save( os.path.join(build_dir, file_format_str.format(name="model", ext="o"))) with open( os.path.join(build_dir, file_format_str.format(name="graph", ext="json")), "w") as f_graph_json: f_graph_json.write(graph) with open( os.path.join(build_dir, file_format_str.format(name="params", ext="bin")), "wb") as f_params: f_params.write(relay.save_param_dict(params))
def main(): dshape = (1, 28, 28) net, params = relay.testing.mlp.get_workload(batch_size=dshape[0], dtype="float32") dshape = (1, 3, 224, 224) net, params = relay.testing.resnet.get_workload(layers=18, batch_size=dshape[0], image_shape=dshape[1:]) with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(net, "llvm --system-lib", params=params) build_dir = osp.abspath(sys.argv[1]) if not osp.isdir(build_dir): os.makedirs(build_dir, exist_ok=True) lib.save(osp.join(build_dir, "model.o")) with open(osp.join(build_dir, "graph.json"), "w") as f_graph_json: f_graph_json.write(graph) with open(osp.join(build_dir, "params.bin"), "wb") as f_params: f_params.write(relay.save_param_dict(params))
def convert_tvm(config, torch_model, x): opt = config['opt'] import onnx import tvm from tvm import relay onnx_model = onnx.load(opt.onnx_path) logger.info("[ONNX model loaded]") batch_size = x[0].shape[0] seq_len = x[0].shape[1] shape_dict = { 'input_ids': (batch_size, seq_len), 'input_mask': (batch_size, seq_len), 'segment_ids': (batch_size, seq_len), } model, params = relay.frontend.from_onnx(onnx_model, shape_dict, opset=opt.onnx_opset) logger.info("[Converting to TVM done]") with open(os.path.join(opt.tvm_dir, 'model.json'), 'w') as fo: fo.write(tvm.ir.save_json(model)) with open(os.path.join(opt.tvm_dir, 'model.params'), 'wb') as fo: fo.write(relay.save_param_dict(params))
def build(target_dir): """ Compiles resnet18 with TVM""" deploy_lib = osp.join(target_dir, 'deploy_lib.o') if osp.exists(deploy_lib): return if args.pretrained: # needs mxnet installed from mxnet.gluon.model_zoo.vision import get_model # if `--pretrained` is enabled, it downloads a pretrained # resnet18 trained on imagenet1k dataset for image classification task block = get_model('resnet18_v1', pretrained=True) net, params = relay.frontend.from_mxnet(block, {"data": data_shape}) # we want a probability so add a softmax operator net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs) else: # use random weights from relay.testing net, params = relay.testing.resnet.get_workload( num_layers=18, batch_size=batch_size, image_shape=image_shape) # compile the model with tvm.transform.PassContext(opt_level=opt_level): graph, lib, params = relay.build_module.build(net, target, params=params) # save the model artifacts lib.save(deploy_lib) cc.create_shared(osp.join(target_dir, "deploy_lib.so"), [osp.join(target_dir, "deploy_lib.o")]) with open(osp.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph) with open(osp.join(target_dir,"deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-o', '--out-dir', default='.') opts = parser.parse_args() dshape = (1, 3, 224, 224) net, params = relay.testing.resnet.get_workload(layers=18, batch_size=dshape[0], image_shape=dshape[1:]) with relay.build_config(opt_level=3): graph, lib, params = relay.build(net, 'llvm --system-lib', params=params) build_dir = osp.abspath(opts.out_dir) if not osp.isdir(build_dir): os.makedirs(build_dir, exist_ok=True) lib.save(osp.join(build_dir, 'model.bc')) with open(osp.join(build_dir, 'graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) with open(osp.join(build_dir, 'params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(params))
def test_fp16_build(): dtype = "float16" if not tvm.runtime.enabled("cuda") or not tvm.gpu(0).exist: print("skip because cuda is not enabled.") return ctx = tvm.gpu(0) if dtype == "float16" and not have_fp16(ctx.compute_version): print("skip because gpu does not support fp16") return x = relay.var("x", dtype=dtype, shape=(4, 4)) y = relay.var("y", dtype=dtype, shape=(4, 4)) z = x + y func = relay.Function([x, y], z) X = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), ctx=ctx) Y = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), ctx=ctx) params = { "x": X, "y": Y, } # build g_json, mmod, params = relay.build(func, "cuda", params=params) # test rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx) rt.load_params(relay.save_param_dict(params)) rt.run() out = rt.get_output(0) np.testing.assert_allclose(out.asnumpy(), X.asnumpy() + Y.asnumpy(), atol=1e-5, rtol=1e-5)
def export_library(self, directory_path): """Export the pipeline executor into disk files. Parameters ---------- directory_path : str Export the files to this directory. """ if not self.pipeline_mods: raise RuntimeError( "The pipeline executor has not been initialized.") # Check if the directory_path exists. if not os.path.exists(directory_path): raise RuntimeError( "The directory {directory_path} does not exist.") # Create an load configuration. load_config_file_name = "{}/load_config".format(directory_path) pipeline_config_file_name = "{}/pipeline_config".format(directory_path) config = {} config["load_config"] = load_config_file_name config["pipeline_config"] = pipeline_config_file_name load_config = [] # Export the library, JSON, and parameter into files, then export these files path # into a configuration file. for lib_index in self.pipeline_mods: mconfig = {} mconfig["mod_idx"] = lib_index mconfig["lib_name"] = "{}/lib{}.so".format(directory_path, lib_index) mconfig["json_name"] = "{}/json{}".format(directory_path, lib_index) mconfig["params_name"] = "{}/params{}".format( directory_path, lib_index) mconfig["dev"] = "{},{}".format( self.pipeline_mods[lib_index]["dev"].device_type, self.pipeline_mods[lib_index]["dev"].device_id, ) # Get the graph, lib, and parameters from GraphExecutorFactoryModule. lib = self.pipeline_mods[lib_index]["lib"] # Export the lib, graph, and parameters to disk. lib.export_library(mconfig["lib_name"]) with open(mconfig["json_name"], "w") as file_handle: file_handle.write(lib.graph_json) with open(mconfig["params_name"], "wb") as file_handle: file_handle.write(relay.save_param_dict(lib.params)) load_config.append(mconfig) with open(load_config_file_name, "w") as file_handle: json.dump(load_config, file_handle) with open(pipeline_config_file_name, "w") as file_handle: json.dump(self.mods_config, file_handle) config_file_name = "{}/config".format(directory_path) with open(config_file_name, "w") as file_handle: json.dump(config, file_handle) return config_file_name
def test_simple_network(self): data = relay.var("data", relay.TensorType((-1, 3, 224, 224), "float32")) weight = relay.var("weight") bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") simple_net = relay.nn.pad(data, ((0, 0), (0, 0), (1, 1), (1, 1))) simple_net = relay.nn.conv2d(data=simple_net, weight=weight, kernel_size=(3, 3), channels=16, padding=(0, 0)) simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] simple_net = relay.nn.relu(simple_net) simple_net = relay.op.reduce.mean(simple_net, axis=(2, 3)) simple_net = relay.op.transform.squeeze(simple_net) dense_weight = relay.var("dense_weight") dense_bias = relay.var('dense_bias') simple_net = relay.nn.dense(simple_net, weight=dense_weight, units=10) simple_net = relay.nn.bias_add(simple_net, dense_bias, axis=1) simple_net = relay.nn.softmax(simple_net, axis=1) simple_net = relay.op.transform.reshape(simple_net, newshape=(-1, 10)) simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net) mod, params = testing.create_workload(simple_net) json_file = os.path.join(FILE_DIR, "relay_mod_test.json") with open(json_file, 'w') as f: json.dump(tvm.ir.save_json(mod), f) params_file = os.path.join(FILE_DIR, "relay_params_test.params") with open(params_file, "wb") as fo: fo.write(relay.save_param_dict(params)) mod_read, params_read = load_model_from_file('Relay', 'Relay')( model_path=json_file, shapes={ 'data': [-1, 3, 224, 224] }, opt_model_path=params_file) xgraph = xf_relay.from_relay(mod_read, params_read) layers = xgraph.get_layers() assert layers[0].type[0] == 'Input' assert layers[1].type[0] == 'Pad' assert layers[2].type[0] == 'Convolution' assert layers[3].type[0] == 'BatchNorm' assert layers[4].type[0] == 'ReLU' assert layers[5].type[0] == 'Mean' assert layers[6].type[0] == 'Squeeze' assert layers[7].type[0] == 'Dense' assert layers[8].type[0] == 'BiasAdd' assert layers[9].type[0] == 'Softmax' assert layers[10].type[0] == 'Reshape' os.remove(json_file) os.remove(params_file)
# ----------------------------- # We can also save the graph, lib and parameters into files and load them # back in deploy environment. #################################################### # save the graph, lib and params into separate files from tvm.contrib import util temp = util.tempdir() path_lib = temp.relpath("deploy_lib.tar") lib.export_library(path_lib) with open(temp.relpath("deploy_graph.json"), "w") as fo: fo.write(graph) with open(temp.relpath("deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params)) print(temp.listdir()) #################################################### # load the module back. loaded_json = open(temp.relpath("deploy_graph.json")).read() loaded_lib = tvm.module.load(path_lib) loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read()) input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32")) module = graph_runtime.create(loaded_json, loaded_lib, ctx) module.load_params(loaded_params) module.run(data=input_data) out_deploy = module.get_output(0).asnumpy()
def compile_graph(mod, params): with tvm.transform.PassContext( opt_level=3, config={"relay.ext.tensorrt.options": config}): graph, lib, params = relay.build(mod, params=params, target="cuda") params = relay.save_param_dict(params) return graph, lib, params
def tvm_compile(func, params, arch, dlr_model_name): ###arch x86_64 if arch == 'x86_64': target = "llvm -model=N3350 -target=x86_64-linux-android -mattr=+ssse3,+sse4.2" sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot" toolchain = "/opt/android-ndk/toolchains/x86_64-4.9/prebuilt/linux-x86_64" os.environ[ 'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/x86_64-linux-android28-clang++" ###arch x86 i686 elif arch == 'x86': target = "llvm -model=x5-Z8350 -target=i686-linux-android -mattr=+ssse3" sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot" toolchain = "/opt/android-ndk/toolchains/x86-4.9/prebuilt/linux-x86_64" os.environ[ 'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/i686-linux-android21-clang++" ###arch arm64 aarch64 elif arch == 'arm64-v8a': target = "llvm -device=arm_cpu -model=SM8150 -target=aarch64-linux-android" sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot" toolchain = "/opt/android-ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64" os.environ[ 'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang++" ###arch armv7 ## More info on armv7 hard/soft abi for Android https://android.googlesource.com/platform/ndk/+/master/docs/HardFloatAbi.md elif arch == 'armeabi-v7a': target = "llvm -device=arm_cpu -model=MSM8940 -target=armv7a-linux-androideabi -mfloat-abi=soft -mattr=+neon,+thumb-mode" sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot" toolchain = "/opt/android-ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64" os.environ[ 'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/armv7a-linux-androideabi21-clang++" else: print("Valid arch: arm64-v8a, armeabi-v7a, x86_64, x86") return print('target:', target) print("Compiling...") with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) print("Compilation done") print("lib type_key: ", lib.type_key) print("Saving files") out_folder = arch + "/" + dlr_model_name + "/" os.makedirs(out_folder, exist_ok=True) # save the graph, lib and params into separate files path_lib = out_folder + "model.so" options = [ "-shared", "-fPIC", "--sysroot", sysroot, "--gcc-toolchain=" + toolchain, "-static-libstdc++" ] lib.export_library(path_lib, ndk.create_shared, options=options) print("export_library done") with open(out_folder + "model.json", "w") as fo: fo.write(graph) with open(out_folder + "model.params", "wb") as fo: fo.write(relay.save_param_dict(params)) print("Files saved to", out_folder)
def tune_and_evaluate(tuning_opt): # extract workloads from relay program print("Extract tasks...") mod, params, input_shape, _ = get_network(network, batch_size=1) tasks = autotvm.task.extract_from_program(mod["main"], target=target, target_host=target_host, params=params, ops=(relay.op.nn.conv2d, )) # run tuning tasks print("Tuning...") tune_tasks(tasks, **tuning_opt) # compile kernels with history best records with autotvm.apply_history_best(log_file): print("Compile...") with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=target, params=params, target_host=target_host) # export library tmp = tempdir() if use_android: from tvm.contrib import ndk filename = "{}.so".format(module_export_prefix) lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "{}.tar".format(module_export_prefix) lib.export_library(tmp.relpath(filename)) lib.imported_modules[0].save( "{}-cuda.ptx".format(module_export_prefix)) lib.export_library("{}-lib.tar".format(module_export_prefix)) with open("{}-graph.json".format(module_export_prefix), "w") as fo: fo.write(graph) with open("{}-params.params".format(module_export_prefix), "wb") as fo: fo.write(relay.save_param_dict(params)) # upload module to device print("Upload...") remote = autotvm.measure.request_remote(device_key, tracker_host, tracker_port, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # upload parameters to device ctx = remote.context(str(target), 0) module = runtime.create(graph, rlib, ctx) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) module.set_input('data', data_tvm) module.set_input(**params) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def build_keyword_model(opts): from model.kws.kws import get_module, prepare_input model_input_name = 'Mfcc' shape_dict = {model_input_name: (1, 49, 10)} mod = get_module(opts.module) print(mod) params_data = None if opts.params: with open(opts.params, 'rb') as f_param: params_data = relay.load_param_dict(f_param.read()) print("Compile...") if opts.tuned: history_file = opts.tuned print(f'INFO: Model tuning for with file {history_file}!') with autotvm.apply_history_best(history_file): with relay.build_config(opt_level=3): graph, lib, out_params = relay.build_module.build( mod, target=TARGET, params=params_data) else: print("INFO: No Tuning!") with relay.build_config(opt_level=3): graph, lib, out_params = relay.build_module.build( mod, target=TARGET, params=params_data) #save model, graph, params model_name = 'keyword' lib.save(os.path.join(build_dir, f'{model_name}_model.o')) print(f'INFO: {model_name}_model.o saved!') with open(os.path.join(build_dir, f'{model_name}_graph.bin'), 'wb') as f_graph: f_graph.write(bytes(graph, 'utf-8')) print(f'INFO: {model_name}_graph.bin saved!') with open(os.path.join(build_dir, f'{model_name}_graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) print(f'INFO: {model_name}_graph.json saved!') with open(os.path.join(build_dir, f'{model_name}_params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(out_params)) print(f'INFO: {model_name}_params.bin saved!') #create input and result local_target = 'llvm --system-lib' with relay.build_config(opt_level=3): graph_test, lib_test, params_test = relay.build_module.build( mod, target=local_target) with open('build/graph.log', 'w') as f: f.write(str(graph)) sample_file = 'python/model/kws/samples/silence.wav' input_data = prepare_input(sample_file) ctx = tvm.context(local_target, 0) m = tvm.contrib.graph_runtime.create(graph_test, lib_test, ctx) m.set_input('Mfcc', input_data) m.set_input(**params_test) m.run() predictions = m.get_output(0, tvm.nd.empty(((1, 12)), 'float32')).asnumpy() predictions = predictions[0] print(f'INFO: sample audio file used: {sample_file}') # save data and output with open(os.path.join(build_dir, f'{model_name}_data.bin'), "wb") as fp: fp.write(input_data.astype(np.float32).tobytes()) print(f'INFO: {model_name}_data.bin saved!') with open(os.path.join(build_dir, f'{model_name}_output.bin'), "wb") as fp: fp.write(predictions.astype(np.float32).tobytes()) print(f'INFO: {model_name}_output.bin saved!') generate_id()
if args.use_gpu: target = "cuda" ctx = tvm.gpu(0) else: target = "llvm" ctx = tvm.cpu() target_host = "llvm" with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, target_host=target_host, params=params) export_graph, export_lib, export_params = lib export_lib.export_library('compiled.so') with open('compiled.json', 'w') as f: f.write(export_graph) with open('compiled.params', 'wb') as f: f.write(relay.save_param_dict(export_params)) print('export complete') if args.verbose: print('Running sanity check in tvm runtime') from tvm.contrib import graph_runtime dtype = "float32" m = graph_runtime.GraphModule(lib["default"](ctx)) # Set inputs m.set_input(input_name, tvm.nd.array(images.cpu().detach().numpy().astype(dtype))) tvm_result = m.run()
def compile_via_tvm(sym, arg_params, aux_params, symbol_file, data_shape, tune): input_shape = [1] + list(data_shape) input_dict = {'data': input_shape} input_name = 'data' batch = 1 seq_length = 128 input_dict = { 'data0': (batch, seq_length), 'data1': (batch, seq_length), 'data2': (batch,) } mod, params = relay.frontend.from_mxnet(sym, dtype={}, shape=input_dict, arg_params=arg_params, aux_params=aux_params) model_name = symbol_file.split('/')[-1].replace('.json','') log_dir = os.getcwd() + "/tuned_logs_c5" pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True) log_file = log_dir + "/" + "%s.log" % model_name graph_opt_sch_file = log_dir + "/" + "%s_graph_opt.log" % model_name Path(log_file).touch() Path(graph_opt_sch_file).touch() if tune: tuning_option = { 'log_filename': log_file, 'tuner': 'random', 'early_stopping': None, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000), ), } tune_and_evaluate(tuning_option, mod, params, input_shape, log_file, graph_opt_sch_file, input_name) # with autotvm.apply_graph_best(graph_opt_sch_file): with autotvm.apply_history_best(log_file): with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=target, params=params) base_dir = os.getcwd() + "/compiled_models" pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True) base = base_dir + '/tvm_' + symbol_file.split('/')[-1].replace('.json','') path_lib = base + '_deploy_lib.tar' path_graph = base + '_deploy_graph.json' path_params = base + '_deploy_params.params' lib.export_library(path_lib) with open(path_graph, 'w') as fo: fo.write(graph) with open(path_params, 'wb') as fo: fo.write(relay.save_param_dict(params))
def export_module(opts): # Target settings layout = "NCHW" # Download required files from tvm.contrib.download import download_testdata model_path = download_testdata(model_url, model_file_name, module=['tf', 'keyword_spotting']) label_path = download_testdata(label_url, label_name, module=['data']) # Import model with tf_compat_v1.gfile.GFile(model_path, 'rb') as f: graph_def = tf_compat_v1.GraphDef() graph_def.ParseFromString(f.read()) graph = tf.import_graph_def(graph_def, name='') graph_def = tf_testing.ProcessGraphDefParam(graph_def) with tf_compat_v1.Session() as sess: graph_def = tf_testing.AddShapesToGraphDef(sess, 'labels_softmax') build_dir = opts.out_dir if not os.path.exists(build_dir): os.makedirs(build_dir) ##save original TF graph if DEBUG_LOG: with open(os.path.join(build_dir, f'{model_name}_graph_original.log'), 'w') as orig_file: orig_file.write(str(graph_def)) ##remove pre-processing nodes and fix begining nodes = [] ##add first op input_dim0 = 1 input_dim1 = 49 input_dim2 = 10 new_input = graph_def.node.add() new_input.op = 'Placeholder' new_input.name = 'Mfcc' new_input.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( type=dtypes.float32.as_datatype_enum)) nodes.append(new_input) removed_count = 0 for ii, node in enumerate(graph_def.node, start=0): if node.op == 'DecodeWav' \ or node.op == 'AudioSpectrogram' \ or node.op == 'Mfcc' \ or node.op == 'Placeholder' \ or node.op == 'wav_data': removed_count += 1 pass else: nodes.append(node) print(f'NUM of layers removed: {removed_count}') new_graph = tf_compat_v1.GraphDef() new_graph.node.extend(nodes) ##log new graph if DEBUG_LOG: with open(os.path.join(build_dir, f'{model_name}_graph_new.log'), 'w') as new_graph_log: new_graph_log.write(str(new_graph)) ##get mod and params with new graph shape_dict = {'Mfcc': (1, 49, 10)} mod, params = relay.frontend.from_tensorflow(new_graph, layout=layout, shape=shape_dict) if DEBUG_LOG: with open(os.path.join(build_dir, f'{model_name}_mod.log'), 'w') as mod_file: mod_file.write(str(mod)) with open(os.path.join(build_dir, f'{model_name}_param.log'), 'w') as param_log: param_log.write(str(params)) #quantization if opts.quantize: if not opts.global_scale: raise RuntimeError('Global Scale is not valid!') global_scale = float(opts.global_scale) print('INFO: Quantizing...') print(f'INFO: Global Scale: {global_scale}') with relay.quantize.qconfig(calibrate_mode='global_scale', global_scale=global_scale, skip_conv_layers=[0]): mod = relay.quantize.quantize(mod, params) if DEBUG_LOG: with open(os.path.join(build_dir, f'{model_name}_mod_quantized.log'), 'w') as mod_log: mod_log.write(str(mod)) #save module if opts.quantize: file_path = f'{build_dir}/module_gs_{global_scale}.pickle' with open(file_path, 'wb') as h1: pickle.dump(mod, h1, protocol=pickle.HIGHEST_PROTOCOL) print(f'INFO: {file_path} saved!') with open(f'{build_dir}/module_gs_{global_scale}.txt', 'w') as f: f.write(mod.astext()) else: file_path = f'{build_dir}/module.pickle' with open(file_path, 'wb') as h1: pickle.dump(mod, h1, protocol=pickle.HIGHEST_PROTOCOL) print(f'INFO: {file_path} saved!') param_path = f'{build_dir}/params.bin' with open(param_path, 'wb') as f_params: f_params.write(relay.save_param_dict(params)) print(f'INFO: {param_path} saved!') with open(f'{build_dir}/module.txt', 'w') as f: f.write(mod.astext()) return mod, params
def export_tvm(path, block, data_shape, epoch=0, preprocess=True, layout='HWC', ctx=mx.cpu(), target='llvm', opt_level=3, use_autotvm=False): """Helper function to export a HybridBlock to TVM executable. Note that tvm package needs to be installed(https://tvm.ai/). Parameters ---------- path : str Path to save model. Three files path_deploy_lib.tar, path_deploy_graph.json and path_deploy_xxxx.params will be created, where xxxx is the 4 digits epoch number. block : mxnet.gluon.HybridBlock The hybridizable block. Note that normal gluon.Block is not supported. data_shape : tuple of int, required Unlike `export_block`, `data_shape` is required here for the purpose of optimization. If dynamic shape is required, you can use the shape that most fits the inference tasks, but the optimization won't accommodate all situations. epoch : int Epoch number of saved model. preprocess : mxnet.gluon.HybridBlock, default is True. Preprocess block prior to the network. By default (True), it will subtract mean [123.675, 116.28, 103.53], divide std [58.395, 57.12, 57.375], and convert original image (B, H, W, C and range [0, 255]) to tensor (B, C, H, W) as network input. This is the default preprocess behavior of all GluonCV pre-trained models. You can use custom pre-process hybrid block or disable by set ``preprocess=None``. layout : str, default is 'HWC' The layout for raw input data. By default is HWC. Supports 'HWC' and 'CHW'. Note that image channel order is always RGB. ctx: mx.Context, default mx.cpu() Network context. target : str, default is 'llvm' Runtime type for code generation, can be ('llvm', 'cuda', 'opencl', 'metal'...) opt_level : int, default is 3 TVM optimization level, if supported, higher `opt_level` may generate more efficient runtime library, however, some operator may not support high level optimization, which will fallback to lower `opt_level`. use_autotvm : bool, default is False Use autotvm for performance tuning. Note that this can take very long time, since it's a search and model based tuning process. Returns ------- None """ try: import tvm from tvm import autotvm from tvm import relay from tvm.relay import testing from tvm.autotvm.tuner import XGBTuner, RandomTuner import tvm.contrib.graph_runtime as runtime except ImportError: print( "TVM package required, please refer https://tvm.ai/ for installation guide." ) raise # add preprocess block if necessary if preprocess: # add preprocess block if preprocess is True: preprocess = _DefaultPreprocess() else: if not isinstance(preprocess, HybridBlock): raise TypeError( "preprocess must be HybridBlock, given {}".format( type(preprocess))) wrapper_block = nn.HybridSequential() preprocess.initialize(ctx=ctx) wrapper_block.add(preprocess) wrapper_block.add(block) else: wrapper_block = block wrapper_block.collect_params().reset_ctx(ctx) # convert to relay graph sym, params = relay.frontend.from_mxnet(wrapper_block, shape={"data": data_shape}) if use_autotvm: def tune_kernels(tasks, measure_option, tuner='gridsearch', early_stopping=None, log_filename='tuning.log'): for i, tsk in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # converting conv2d tasks to conv2d_NCHWc tasks op_name = tsk.workload[0] if op_name == 'conv2d': func_create = 'topi_x86_conv2d_NCHWc' elif op_name == 'depthwise_conv2d_nchw': func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw' else: raise ValueError( "Tuning {} is not supported on x86".format(op_name)) task = autotvm.task.create(func_create, args=tsk.args, target=target, template_key='direct') task.workload = tsk.workload # create tuner if tuner in ('xgb', 'xgb-rank'): tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=50) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = len(task.config_space) tuner_obj.tune(n_trial=n_trial, early_stopping=early_stopping, measure_option=measure_option, callbacks=[ autotvm.callback.progress_bar( n_trial, prefix=prefix), autotvm.callback.log_to_file(log_filename) ]) # tasks = autotvm.task.extract_from_program(sym, target=target, params=params, ops=(relay.op.nn.conv2d, )) logging.warning('Start tunning, this can be slow...') tuning_option = { 'log_filename': 'tune.log', 'tuner': 'random', 'early_stopping': None, 'measure_option': autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(number=10, repeat=1, min_repeat_ms=1000), ), } tune_kernels(tasks, **tuning_option) with autotvm.apply_history_best(log_file): with relay.build_config(opt_level=opt_level): graph, lib, params = relay.build_module.build(sym, target=target, params=params) else: with relay.build_config(opt_level=opt_level): graph, lib, params = relay.build_module.build(sym, target, params=params) # export library, json graph and parameters lib.export_library(path + '_deploy_lib.so') with open(path + '_deploy_graph.json', 'w') as fo: fo.write(graph) with open(path + '_deploy_{:04n}.params'.format(epoch), 'wb') as fo: try: fo.write(relay.compiler.save_param_dict(params)) except AttributeError: fo.write(relay.save_param_dict(params))
def compile(info): if info['model_path'].endswith('.onnx'): is_onnx = True elif info['model_path'].endswith('.pb'): is_onnx = False else: raise Exception('Model file format not supported') # Load model if is_onnx: onnx_model = onnx.load(info['model_path']) mod, params = relay.frontend.from_onnx(onnx_model, info['input_dict']) optimization_level = 3 else: with tf.compat.v1.Session() as sess: with tf.io.gfile.GFile(info['model_path'], 'rb') as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) input_map = {} for index, (name, shape) in enumerate(info['input_dict'].items()): tf_new_image = tf.compat.v1.placeholder( shape=[1 if x == -1 else x for x in shape], dtype=info['input_data_type'], name=name) input_map["input:" + str(index)] = tf_new_image tf.import_graph_def(graph_def, name='', input_map=input_map) graph_def = sess.graph.as_graph_def() graph_def = tf_testing.ProcessGraphDefParam(graph_def) input_shape_dict = {'DecodeJpeg/contents': info['input_list']} mod, params = relay.frontend.from_tensorflow( graph_def, shape=input_shape_dict, outputs=info['output_names']) optimization_level = 2 # Set compilation params if info['cross_compile']: if info['target'] == 'cuda': raise Exception('cuda cross-compilation not supported yet') info['target'] += ' -target=aarch64-linux-gnu' # Transform data layout to what is expected by CUDA hardware, i.e. NCHW if info['target'] == 'cuda': desired_layouts = {'nn.conv2d': ['NCHW', 'default']} seq = tvm.transform.Sequential([ relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts) ]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) # Compile model # Note opt_level cannot be higher than 2 because of a bug: # https://discuss.tvm.ai/t/tvm-0-6-1-compile-yolo-v2-tiny-fail-worked-in-v0-5-2/7244 with autotvm.apply_history_best(info['autotvm_log']): with relay.build_config(opt_level=optimization_level): graph, lib, params = relay.build(mod, target=info['target'], params=params) # Write the compiled model to files output_model_path = path.join(info['output_path'], OUTPUT_NETWORK_MODULE_FILENAME) output_graph_path = path.join(info['output_path'], OUTPUT_NETWORK_GRAPH_FILENAME) output_param_path = path.join(info['output_path'], OUTPUT_NETWORK_PARAM_FILENAME) print('Writing library to', output_model_path) if info['cross_compile']: lib.export_library( output_model_path, cc.build_create_shared_func(options=[ '--target=aarch64-linux-gnu', '-march=armv8-a', '-mfpu=NEON' ], compile_cmd='/usr/bin/clang')) else: lib.export_library(output_model_path) print('Writing graph to', output_graph_path) with open(output_graph_path, 'w') as graph_file: graph_file.write(graph) print('Writing weights to', output_param_path) with open(output_param_path, 'wb') as param_file: param_file.write(relay.save_param_dict(params))
print("Tuning graph...") # tune_graph(mod["main"], data_shape, option['log_file'], option['graph_best_file']) print("Compile...") # if use tune_tasks # with autotvm.apply_history_best(option['log_best_file']): # if use tune_graph # with autotvm.apply_graph_best(option['graph_best_file']): with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(mod, target=option['target'], params=params_) print('Exporting library...') lib.export_library(option['path_lib']) with open(option['path_graph'], "w") as fo: fo.write(graph) with open(option['path_params'], "wb") as fo: fo.write(relay.save_param_dict(params)) print('Loading library...') loaded_lib = tvm.module.load(option['path_lib']) loaded_graph = open(option['path_graph']).read() loaded_params = bytearray(open(option['path_params'], 'rb').read()) print('Runing...') ctx = tvm.context(option['target'], 0) data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype('float32')) m = tvm.contrib.graph_runtime.create(loaded_graph, loaded_lib, ctx) m.load_params(loaded_params) m.run(**{input_name:data_tvm}) #or m.set_input(input_name, data_tvm); m.run() out = m.get_output(0) print(out.asnumpy().argmax())
def build_conv2d_module(opts): batch = 1 in_channel = 3 out_channel = 16 in_size = 8 kernel = 3 pad = 1 stride = 1 A = relay.var('A', shape=(batch, in_channel, in_size, in_size)) W = relay.var('W', shape=(out_channel, in_channel, kernel, kernel)) B = relay.op.nn.nn.conv2d(A, W, strides=(stride, stride), padding=(pad, pad), kernel_size=kernel, data_layout='NCHW', kernel_layout='OIHW', out_layout='', out_dtype='') a_data = np.random.uniform(size=(batch, in_channel, in_size, in_size)).astype('float32') w_data = np.random.uniform(size=(out_channel, in_channel, kernel, kernel)).astype('float32') func = relay.Function([A, W], B) params = {"W": w_data} graph, lib, params = relay.build_module.build(tvm.IRModule.from_expr(func), target=TARGET, params=params) build_dir = os.path.abspath(opts.out_dir) if not os.path.isdir(build_dir): os.makedirs(build_dir) lib.save(os.path.join(build_dir, 'conv2d_model.o')) with open(os.path.join(build_dir, 'conv2d_graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) with open(os.path.join(build_dir, 'conv2d_params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(params)) with open(os.path.join(build_dir, "conv2d_data.bin"), "wb") as fp: fp.write(a_data.astype(np.float32).tobytes()) ## get TVM result on local machine params = {"W": w_data} local_target = 'llvm --system-lib' graph, lib, params = relay.build_module.build(tvm.IRModule.from_expr(func), target=local_target, params=params) tvm_out = run_conv2d_module(a_data, graph, lib, params, target=local_target) b_np = conv2d_nchw_python(a_data, w_data, (stride, stride), (pad, pad)) print("TVM Output: " + str(tvm_out.shape)) print("Numpy Output: " + str(b_np.shape)) np.testing.assert_allclose(b_np, tvm_out, rtol=1e-2) with open(os.path.join(build_dir, "conv2d_output.bin"), "wb") as fp: fp.write(tvm_out.astype(np.float32).tobytes())
def export_classic_format( self, executor_factory: GraphExecutorFactoryModule, package_path: Optional[str] = None, cross: Optional[Union[str, Callable]] = None, cross_options: Optional[str] = None, lib_format: str = "so", ): """Save this TVMCModel to file. Parameters ---------- executor_factory : GraphExecutorFactoryModule The factory containing compiled the compiled artifacts needed to run this model. package_path : str, None Where the model should be saved. Note that it will be packaged as a .tar file. If not provided, the package will be saved to a generically named file in tmp. cross : str or callable object, optional Function that performs the actual compilation. cross_options : str, optional Command line options to be passed to the cross compiler. lib_format : str How to export the modules function library. Must be one of "so" or "tar". Returns ------- package_path : str The path that the package was saved to. """ lib_name = "mod." + lib_format graph_name = "mod.json" param_name = "mod.params" temp = self._tmp_dir if package_path is None: package_path = self.default_package_path() path_lib = temp.relpath(lib_name) if not cross: executor_factory.get_lib().export_library(path_lib) else: if not cross_options: executor_factory.get_lib().export_library( path_lib, tvm.contrib.cc.cross_compiler(cross)) else: executor_factory.get_lib().export_library( path_lib, tvm.contrib.cc.cross_compiler( cross, options=cross_options.split(" "))) self.lib_path = path_lib with open(temp.relpath(graph_name), "w") as graph_file: graph_file.write(executor_factory.get_graph_json()) with open(temp.relpath(param_name), "wb") as params_file: params_file.write( relay.save_param_dict(executor_factory.get_params())) # Package up all the temp files into a tar file. with tarfile.open(package_path, "w") as tar: tar.add(path_lib, lib_name) tar.add(temp.relpath(graph_name), graph_name) tar.add(temp.relpath(param_name), param_name) return package_path
lib_name = "main.so" elif platform.system() == "Windows": lib_name = "main.dll" else: raise Exception("unknown system " + platform.system()) print("export_library main lib") lib.export_library(lib_name) # or save object file for deploy usage # lib.save(os.path.join(work_root, binary_dir, 'model.o')) print("load main lib") sysLib = tvm.runtime.load_module(lib_name) ctx = tvm.cpu(0) input_data = np.random.random(dshape).astype(np.float32) for fk in ret_mods: mg = ret_mods[fk].get_json() mp = ret_mods[fk].get_params() print("test " + fk + " ------------------------------------") module = graph_runtime.create(mg, sysLib, ctx) module.load_params(relay.save_param_dict(mp)) module.set_input("data", tvm.nd.array(input_data)) module.run() num_output = module.get_num_outputs() for idx in range(num_output): print(module.get_output(idx).shape)
def compile(info): if info['model_path'].endswith('.onnx'): is_onnx = True elif info['model_path'].endswith('.pb'): is_onnx = False else: raise Exception('Model file format not supported') # Load model if is_onnx: onnx_model = onnx.load(info['model_path']) mod, params = relay.frontend.from_onnx(onnx_model, info['input_dict']) optimization_level = 3 else: with tf.compat.v1.Session() as sess: with tf.io.gfile.GFile(info['model_path'], 'rb') as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') graph_def = sess.graph.as_graph_def() graph_def = tf_testing.ProcessGraphDefParam(graph_def) input_shape_dict = {'DecodeJpeg/contents': info['input_list']} mod, params = relay.frontend.from_tensorflow( graph_def, shape=input_shape_dict, outputs=info['output_names']) optimization_level = 2 # Set compilation params target = 'llvm' if info['cross_compile']: target += ' -target=aarch64-linux-gnu' # Compile model # Note opt_level cannot be higher than 2 because of a bug: # https://discuss.tvm.ai/t/tvm-0-6-1-compile-yolo-v2-tiny-fail-worked-in-v0-5-2/7244 with relay.build_config(opt_level=optimization_level): graph, lib, params = relay.build(mod, target=target, params=params) # Write the compiled model to files output_model_path = path.join(info['output_path'], OUTPUT_NETWORK_MODULE_FILENAME) output_graph_path = path.join(info['output_path'], OUTPUT_NETWORK_GRAPH_FILENAME) output_param_path = path.join(info['output_path'], OUTPUT_NETWORK_PARAM_FILENAME) print('Writing library to', output_model_path) if info['cross_compile']: lib.export_library( output_model_path, cc.build_create_shared_func(options=[ '--target=aarch64-linux-gnu', '-march=armv8-a', '-mfpu=NEON' ], compile_cmd='/usr/bin/clang')) else: lib.export_library(output_model_path) print('Writing graph to', output_graph_path) with open(output_graph_path, 'w') as graph_file: graph_file.write(graph) print('Writing weights to', output_param_path) with open(output_param_path, 'wb') as param_file: param_file.write(relay.save_param_dict(params))
def build_cifar(opts, model_name): from tuning.model.cifar10_relay import get_cifar_relay from tuning.model.cifar10_arm import get_cifar_keras, gen_custom_cifar_keras if model_name == 'cifar-10': # cifar_path = 'tuning/model/saved_models/cifar10_ch8_best.h5' # model_input_name = 'conv2d_1_input' model_input_name = 'cifar10_arm_input' # cifar_path = 'tuning/model/saved_models/cifar10_arm_best.h5' shape_dict = {model_input_name: (1, 3, 32, 32)} # model = get_cifar_keras(cifar_path, shape_dict) model = gen_custom_cifar_keras(shape_dict) mod, params = tvm.relay.frontend.from_keras(model, shape_dict) print("Compile...") if opts.tuned: print("INFO: Tuned model!") with autotvm.apply_history_best( os.path.join('tuning', 'cifar_arm_footprint_min.txt')): if opts.quantize: with relay.quantize.qconfig(calibrate_mode='global_scale', global_scale=8.0): mod = relay.quantize.quantize(mod, params) print('INFO: Quantized!') with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=TARGET, params=params) else: print("INFO: No Tuning!") with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(mod, target=TARGET, params=params) elif model_name == 'cifar-10-relay': mod, params = get_cifar_relay() print('type: ', type(mod)) print(mod.get_global_type_var) shape_dict = {'data': (1, 3, 32, 32)} print("Compile...") if opts.tuned: print("INFO: Tuned model!") with autotvm.apply_history_best( os.path.join('tuning', 'cifar_relay_footprint_min.txt')): if opts.quantize: with relay.quantize.qconfig(calibrate_mode='global_scale', global_scale=8.0): mod = relay.quantize.quantize(mod, params) print('INFO: Quantized!') with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build( mod, target=TARGET, params=params) else: print("INFO: No Tuning!") with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(mod, target=TARGET, params=params) else: raise ValueError('Wrong model name!') #save model, graph, params lib.save(os.path.join(build_dir, 'cifar_model.o')) with open(os.path.join(build_dir, 'cifar_graph.bin'), 'wb') as f_graph: f_graph.write(bytes(graph, 'utf-8')) with open(os.path.join(build_dir, 'cifar_graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) with open(os.path.join(build_dir, 'cifar_params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(params)) #create input and result if model_name == 'cifar-10': import keras from keras.datasets import cifar10 # from keras.models import load_model num_classes = 10 # model = load_model(cifar_path) (_, _), (x_test, y_test) = cifar10.load_data() x_test = x_test.astype('float32') x_test /= 255 y_test = keras.utils.to_categorical(y_test, num_classes) test_x_sample = x_test[0:1, :, :, :] test_y_sample = y_test[0:1, :] print('x_test_sample shape:', test_x_sample.shape) print('y_test_sample shape:', test_y_sample.shape) scores = model.evaluate(test_x_sample, test_y_sample, verbose=1) keras_predict = model.predict(test_x_sample) print(keras_predict) ## get TVM result on local machine mod, params = relay.frontend.from_keras(model, shape_dict) local_target = 'llvm --system-lib' if opts.quantize: with relay.quantize.qconfig(calibrate_mode='global_scale', global_scale=8.0): mod = relay.quantize.quantize(mod, params) with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(mod, target=local_target, params=params) ctx = tvm.context(local_target, 0) ## create module module = tvm.contrib.graph_runtime.create(graph, lib, ctx) tvm_sample = test_x_sample.transpose([0, 3, 1, 2]) # print("tvm_sample shape: ", tvm_sample.shape) module.set_input(model_input_name, tvm_sample) module.set_input(**params) ## run module.run() # get output tvm_out = module.get_output(0).asnumpy() print("TVM Output: " + str(tvm_out.shape)) print("Keras Output: " + str(keras_predict.shape)) if not opts.quantize: np.testing.assert_allclose(tvm_out, keras_predict, rtol=1e-2) elif model_name == 'cifar-10-relay': tvm_sample = np.array([1]) tvm_out = np.array([1]) else: raise ValueError('Wrong model name!') # save data and output with open(os.path.join(build_dir, "cifar_data.bin"), "wb") as fp: fp.write(tvm_sample.astype(np.float32).tobytes()) with open(os.path.join(build_dir, "cifar_output.bin"), "wb") as fp: fp.write(tvm_out.astype(np.float32).tobytes()) generate_id()
# ----------------------------- # We can also save the graph, lib and parameters into files and load them # back in deploy environment. #################################################### # save the graph, lib and params into separate files from tvm.contrib import util temp = util.tempdir() path_lib = temp.relpath("deploy_lib.tar") lib.export_library(path_lib) with open(temp.relpath("deploy_graph.json"), "w") as fo: fo.write(graph) with open(temp.relpath("deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params)) print(temp.listdir()) #################################################### # load the module back. loaded_json = open(temp.relpath("deploy_graph.json")).read() loaded_lib = tvm.runtime.load_module(path_lib) loaded_params = bytearray( open(temp.relpath("deploy_param.params"), "rb").read()) input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32")) module = graph_runtime.create(loaded_json, loaded_lib, ctx) module.load_params(loaded_params) module.run(data=input_data) out_deploy = module.get_output(0).asnumpy()
def quantize_model(args): """Build with relay.""" import tvm from tvm import relay from tvm.relay import quantize as qtz img_size = 224 data_shape = (args.batch_size, 3, img_size, img_size) mx_sym, mx_args, mx_auxs = mx.model.load_checkpoint(args.model, 0) net, params = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, arg_params=mx_args, aux_params=mx_auxs) target = args.target if args.original: # run original model with relay.build_config(opt_level=3): graph, lib, params = relay.build(net, target, params=params) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx # constant folding and scale folding. # print('original') # print(net.astext(show_meta_data=False)) with relay.build_config(opt_level=3): qgraph = relay.optimize(net, target, params) # print('after optimize') # print(qgraph.astext(show_meta_data=False)) with qtz.qconfig(skip_k_conv=0, nbit_input=args.nbit_input, nbit_weight=args.nbit_input, global_scale=args.global_scale, dtype_input=args.dtype_input, dtype_weight=args.dtype_input, dtype_activation=args.dtype_output, store_lowbit_output=False, debug_enabled_ops=None): print(qtz.current_qconfig()) qgraph = qtz.annotate(qgraph) # print('after annotate') # print(qgraph.astext(show_meta_data=False)) qgraph = qtz.calibrate(qgraph) # print('after calibrate\n') # print(qgraph.astext(show_meta_data=False)) if not args.simulated: qgraph = qtz.realize(qgraph) qgraph = relay.ir_pass.infer_type(qgraph) # print('after realize\n') # print(qgraph.astext(show_meta_data=False)) with relay.build_config(opt_level=3): graph, lib, params = relay.build(qgraph, target) ### save/load the graph, lib and params into separate files # save lib.export_library(os.path.join(thisdir, "deploy_lib.so")) with open(os.path.join(thisdir, "deploy_graph.json"), "w") as fo: fo.write(graph) with open(os.path.join(thisdir, "deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params)) # load graph = open(os.path.join(thisdir, "deploy_graph.json")).read() lib = tvm.module.load(os.path.join(thisdir, "deploy_lib.so")) params = bytearray( open(os.path.join(thisdir, "deploy_param.params"), "rb").read()) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx