def torch2executor(model, inputs, target): prefix = f"mobilenetv2_tsm_tvm_{target}" lib_fname = f'{prefix}.tar' graph_fname = f'{prefix}.json' params_fname = f'{prefix}.params' if os.path.exists(lib_fname) and os.path.exists( graph_fname) and os.path.exists(params_fname): with open(graph_fname, 'rt') as f: graph = f.read() tvm_module = tvm.module.load(lib_fname) params = relay.load_param_dict( bytearray(open(params_fname, 'rb').read())) else: graph, tvm_module, params = torch2tvm_module(model, inputs, target) tvm_module.export_library(lib_fname) with open(graph_fname, 'wt') as f: f.write(graph) with open(params_fname, 'wb') as f: f.write(relay.save_param_dict(params)) ctx = tvm.gpu() if target.startswith('cuda') else tvm.cpu() graph_module = graph_runtime.create( graph, tvm_module, ctx) # graph json, tvm module and tvm context for pname, pvalue in params.items(): graph_module.set_input(pname, pvalue) def executor(inputs): for idx, value in enumerate(inputs): graph_module.set_input(idx, value) graph_module.run() return tuple( graph_module.get_output(idx) for idx in range(len(inputs))) return executor, ctx
def import_graphdef( name, batch_size, seq_len, save_relay=True, relay_file="model.json", relay_params="model.params", ): abs_path = os.path.dirname(os.path.abspath(__file__)) shape_dict = {"input_1": (batch_size, seq_len)} relay_file = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_file)).replace("/", "_") relay_params = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_params)).replace( "/", "_") if os.path.exists(os.path.join(abs_path, relay_file)) and os.path.exists( os.path.join(abs_path, relay_params)): with open(os.path.join(abs_path, relay_file), "r") as fi: mod = tvm.ir.load_json(fi.read()) with open(os.path.join(abs_path, relay_params), "rb") as fi: params = relay.load_param_dict(fi.read()) else: graph_def = download_model(name, batch_size, seq_len) mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict) if save_relay: with open(os.path.join(abs_path, relay_file), "w") as fo: fo.write(tvm.ir.save_json(mod)) with open(os.path.join(abs_path, relay_params), "wb") as fo: fo.write(relay.save_param_dict(params)) return mod, params, shape_dict
def deserialize_relay(json_path, params_path): with open(json_path, "r") as fi: mod = tvm.ir.load_json(fi.read()) with open(params_path, "rb") as fi: params = relay.load_param_dict(fi.read()) return mod, params
def test_save_load(): x = np.ones((10, 2)).astype("float32") y = np.ones((1, 2, 3)).astype("float32") params = {"x": x, "y": y} param_bytes = runtime.save_param_dict(params) assert isinstance(param_bytes, bytearray) param2 = relay.load_param_dict(param_bytes) assert len(param2) == 2 np.testing.assert_equal(param2["x"].numpy(), x) np.testing.assert_equal(param2["y"].numpy(), y)
def test_save_load(): x = np.ones((10, 2)).astype("float32") y = np.ones((1, 2, 3)).astype("float32") params = {"x": x, "y": y} param_bytes = relay.save_param_dict(params) assert isinstance(param_bytes, bytearray) param2 = relay.load_param_dict(param_bytes) assert len(param2) == 2 np.testing.assert_equal(param2["x"].asnumpy(), x) np.testing.assert_equal(param2["y"].asnumpy(), y)
def get_network(batch_size=1): # load exported parameters, graph def and library. input_shape = (batch_size, 3, 300, 300) output_shape = (batch_size, 8732, 6) export_ssd_module = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_module.json") export_params = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_param.params") module = tvm.ir.load_json(open(export_ssd_module, "r").read()) params = relay.load_param_dict(open(export_params, "rb").read()) return module, params, input_shape, output_shape
def test_ndarray_reflection(): # Make two `NDArrayWrapper`s that point to the same underlying array. np_array = np.random.uniform(size=(10, 2)).astype("float32") tvm_array = tvm.nd.array(np_array) param_dict = {'x': tvm_array, 'y': tvm_array} assert param_dict['x'].same_as(param_dict['y']) # Serialize then deserialize `param_dict`. deser_param_dict = relay.load_param_dict(relay.save_param_dict(param_dict)) # Make sure the data matches the original data and `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict['x'].asnumpy(), tvm_array.asnumpy()) # Make sure `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict['x'].asnumpy(), deser_param_dict['y'].asnumpy())
def from_relay(model_path, shapes, outputs=None, opt_model_path=None): # type: (str, dict, List[str, ]str) -> relay.expr.Module, dict """ Load Relay model from file """ with open(model_path, 'rb') as f: # json_str = json.load(f) mod = tvm.ir.load_json(f.read()) with open(opt_model_path, "rb") as f: params = relay.load_param_dict(bytearray(f.read())) return mod, params
def test_ndarray_reflection(): # Make two `NDArrayWrapper`s that point to the same underlying array. np_array = np.random.uniform(size=(10, 2)).astype("float32") tvm_array = tvm.nd.array(np_array) param_dict = {"x": tvm_array, "y": tvm_array} assert param_dict["x"].same_as(param_dict["y"]) # Serialize then deserialize `param_dict`. deser_param_dict = relay.load_param_dict( runtime.save_param_dict(param_dict)) # Make sure the data matches the original data and `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict["x"].numpy(), tvm_array.numpy()) # Make sure `x` and `y` contain the same data. np.testing.assert_equal(deser_param_dict["x"].numpy(), deser_param_dict["y"].numpy())
def load_tvm(self, export_dir): """Load tvm module from export directory""" self.export_dir = export_dir self.tvm_lib = load_module(os.path.join(export_dir, TVM_ASSETS[0])) with open(os.path.join(export_dir, TVM_ASSETS[1]), "r", encoding="utf8") as f: self.tvm_graph = f.read() with open(os.path.join(export_dir, TVM_ASSETS[2]), "rb") as f: self.tvm_params = relay.load_param_dict(f.read()) self.tvm_module = graph_executor.create(self.tvm_graph, self.tvm_lib, device=self.dev) self.tvm_module.set_input(**self.tvm_params) return self.tvm_module
def get_input_info(graph_str, params): """Return the 'shape' and 'dtype' dictionaries for the input tensors of a compiled module. .. note:: We can't simply get the input tensors from a TVM graph because weight tensors are treated equivalently. Therefore, to find the input tensors we look at the 'arg_nodes' in the graph (which are either weights or inputs) and check which ones don't appear in the params (where the weights are stored). These nodes are therefore inferred to be input tensors. Parameters ---------- graph_str : str JSON graph of the module serialized as a string. params : bytearray Params serialized as a bytearray. Returns ------- shape_dict : dict Shape dictionary - {input_name: tuple}. dtype_dict : dict dtype dictionary - {input_name: dtype}. """ shape_dict = {} dtype_dict = {} params_dict = load_param_dict(params) param_names = [k for (k, v) in params_dict.items()] graph = json.loads(graph_str) for node_id in graph["arg_nodes"]: node = graph["nodes"][node_id] # If a node is not in the params, infer it to be an input node name = node["name"] if name not in param_names: shape_dict[name] = graph["attrs"]["shape"][1][node_id] dtype_dict[name] = graph["attrs"]["dltype"][1][node_id] logger.debug("collecting graph input shape and type:") logger.debug("graph input shape: %s", shape_dict) logger.debug("graph input type: %s", dtype_dict) return shape_dict, dtype_dict
def load_pt_model(name, path="./models/", relay_file="_pt_model.json", relay_params="_pt_model.params"): with open(os.path.join(path, name + relay_file), "r") as fi: mod = tvm.ir.load_json(fi.read()) with open(os.path.join(path, name + relay_params), "rb") as fi: params = relay.load_param_dict(fi.read()) mod = tvm.relay.transform.FastMath()(mod) mod = tvm.relay.transform.EliminateCommonSubexpr()(mod) BindPass = tvm.relay.transform.function_pass( lambda fn, new_mod, ctx: tvm.relay.build_module.bind_params_by_name( fn, params), opt_level=1) mod = BindPass(mod) mod = tvm.relay.transform.FoldConstant()(mod) mod = tvm.relay.transform.CombineParallelBatchMatmul()(mod) mod = tvm.relay.transform.FoldConstant()(mod) return mod, params, {"input_ids": [1, 128]}
def import_graphdef( name, batch_size, seq_len, save_relay=True, relay_file="model.json", relay_params="model.params", ): abs_path = os.path.dirname(os.path.abspath(__file__)) shape_dict = { "input_ids": (batch_size, seq_len), "attention_mask": (batch_size, seq_len), "token_type_ids": (batch_size, seq_len), } shape_list = [ ("input_ids", (batch_size, seq_len)), ("attention_mask", (batch_size, seq_len)), ("token_type_ids", (batch_size, seq_len)), ] relay_file = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_file)).replace("/", "_") relay_params = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_params)).replace( "/", "_") if os.path.exists(os.path.join(abs_path, relay_file)) and os.path.exists( os.path.join(abs_path, relay_params)): with open(os.path.join(abs_path, relay_file), "r") as fi: mod = tvm.ir.load_json(fi.read()) with open(os.path.join(abs_path, relay_params), "rb") as fi: params = relay.load_param_dict(fi.read()) else: scripted_model = load_pytorch_model(name, batch_size, seq_len) mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) if save_relay: with open(os.path.join(abs_path, relay_file), "w") as fo: fo.write(tvm.ir.save_json(mod)) with open(os.path.join(abs_path, relay_params), "wb") as fo: fo.write(relay.save_param_dict(params)) return mod, params, shape_dict
def load(self, model_path: str): """Load a TVMCModel from disk. Parameters ---------- model_path : str A path to load the TVMCModel from. """ temp = self._tmp_dir t = tarfile.open(model_path) t.extractall(temp.relpath(".")) # Load relay IR. relay_path = temp.relpath("model.json") with open(relay_path, "r") as relay_file: self.mod = tvm.ir.load_json(relay_file.read()) # Load parameter dictionary. params_path = temp.relpath("model.params") with open(params_path, "rb") as params_file: self.params = relay.load_param_dict(params_file.read())
def test_accuracy(opts, target): with open(opts.module, 'rb') as handle: mod = pickle.load(handle) params_data = None if not opts.quantize: with open(opts.params, 'rb') as f_params: params_data = relay.load_param_dict(f_params.read()) lib, graph, out_params = build(opts, mod=mod, params=params_data, target=target) ctx = tvm.context(target, 0) m = tvm.contrib.graph_runtime.create(graph, lib, ctx) #get test data num_of_samples = int(opts.test) test_data, test_label = get_dataset(num_of_samples) print(f'INFO: testing {num_of_samples} samples') #eval data corrects = 0 count = 0 for test, label in zip(test_data, test_label): count += 1 input_data = test.reshape((1, 49, 10)) m.set_input('Mfcc', input_data) m.set_input(**out_params) m.run() predictions = m.get_output(0, tvm.nd.empty(((1, 12)), 'float32')).asnumpy() predictions = predictions[0] exp_ind = np.argmax(label) pred_ind = np.argmax(predictions) if pred_ind == exp_ind: corrects += 1 acc = corrects/(num_of_samples * 1.0) print(f'Accuracy for {num_of_samples} samples: {acc}') return acc
def build_keyword_model(opts): from model.kws.kws import get_module, prepare_input model_input_name = 'Mfcc' shape_dict = {model_input_name: (1, 49, 10)} mod = get_module(opts.module) print(mod) params_data = None if opts.params: with open(opts.params, 'rb') as f_param: params_data = relay.load_param_dict(f_param.read()) print("Compile...") if opts.tuned: history_file = opts.tuned print(f'INFO: Model tuning for with file {history_file}!') with autotvm.apply_history_best(history_file): with relay.build_config(opt_level=3): graph, lib, out_params = relay.build_module.build( mod, target=TARGET, params=params_data) else: print("INFO: No Tuning!") with relay.build_config(opt_level=3): graph, lib, out_params = relay.build_module.build( mod, target=TARGET, params=params_data) #save model, graph, params model_name = 'keyword' lib.save(os.path.join(build_dir, f'{model_name}_model.o')) print(f'INFO: {model_name}_model.o saved!') with open(os.path.join(build_dir, f'{model_name}_graph.bin'), 'wb') as f_graph: f_graph.write(bytes(graph, 'utf-8')) print(f'INFO: {model_name}_graph.bin saved!') with open(os.path.join(build_dir, f'{model_name}_graph.json'), 'w') as f_graph_json: f_graph_json.write(graph) print(f'INFO: {model_name}_graph.json saved!') with open(os.path.join(build_dir, f'{model_name}_params.bin'), 'wb') as f_params: f_params.write(relay.save_param_dict(out_params)) print(f'INFO: {model_name}_params.bin saved!') #create input and result local_target = 'llvm --system-lib' with relay.build_config(opt_level=3): graph_test, lib_test, params_test = relay.build_module.build( mod, target=local_target) with open('build/graph.log', 'w') as f: f.write(str(graph)) sample_file = 'python/model/kws/samples/silence.wav' input_data = prepare_input(sample_file) ctx = tvm.context(local_target, 0) m = tvm.contrib.graph_runtime.create(graph_test, lib_test, ctx) m.set_input('Mfcc', input_data) m.set_input(**params_test) m.run() predictions = m.get_output(0, tvm.nd.empty(((1, 12)), 'float32')).asnumpy() predictions = predictions[0] print(f'INFO: sample audio file used: {sample_file}') # save data and output with open(os.path.join(build_dir, f'{model_name}_data.bin'), "wb") as fp: fp.write(input_data.astype(np.float32).tobytes()) print(f'INFO: {model_name}_data.bin saved!') with open(os.path.join(build_dir, f'{model_name}_output.bin'), "wb") as fp: fp.write(predictions.astype(np.float32).tobytes()) print(f'INFO: {model_name}_output.bin saved!') generate_id()
image = np.array(image) - np.array([123., 117., 104.]) image /= np.array([58.395, 57.12, 57.375]) image = image.transpose((2, 0, 1)) image = image[np.newaxis, :] # (N, C, H, W) return image.astype('float32') x = image_preprocessing(image) # build pretrained model if os.path.exists(mod_fn): # If the compiled library exists, load it graph = open(graph_fn).read() mod = tvm.module.load(mod_fn) params = relay.load_param_dict(open(params_fn, 'rb').read()) else: graph, mod, params = build_pretrained_model(name) # Save the compiled library mod.export_library(mod_fn) with open(graph_fn, 'w') as f: f.write(graph) with open(params_fn, 'wb') as f: f.write(relay.save_param_dict(params)) ''' The compiled module has three parts: graph is a json string described the neural network, mod is a library that contains all compiled operators used to run the inference, params is a dictionary mapping parameter name to weights.