def add_model(cls, input_file="model.onnx", output_file="add.onnx"): """增加节点 在Sigmoid 前增加 LeakyRelu 节点() """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # 找到最后一个名为 Sigmoid 的节点 # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点) # 2. Change the Add to a LeakyRelu lrelu = gs.Variable('new_lrelu', dtype=np.float32) graph.nodes.append( gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs={"alpha": 0.02})) # 此时 sigmoid输入变成了lrelu(输出) first_add.inputs.clear() first_add.inputs = [lrelu] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def onnx_change(onnx_path): '''该部分代码由导师提供,解决trt inference 全是0的问题,感谢!!! ''' node_configs = [(2682, 2684), (2775, 2777), (2961, 2963), (3333, 3335), (4077, 4079)] if 'batch_2' in onnx_path: node_number = node_configs[1] elif 'batch_4' in onnx_path: node_number = node_configs[2] elif 'batch_8' in onnx_path: node_number = node_configs[3] elif 'batch_16' in onnx_path: node_number = node_configs[4] else: node_number = node_configs[0] graph = gs.import_onnx(onnx.load(onnx_path)) for node in graph.nodes: if node.name == f"Gather_{node_number[0]}": print(node.inputs[1]) node.inputs[1].values = np.int64(5) print(node.inputs[1]) elif node.name == f"Gather_{node_number[1]}": print(node.inputs[1]) node.inputs[1].values = np.int64(5) print(node.inputs[1]) onnx.save(gs.export_onnx(graph), onnx_path) print(f"[INFO] onnx修改完成, 保存在{onnx_path}.")
def modify_onnx(onnx_model_file): graph = gs.import_onnx(onnx.load(onnx_model_file)) assert (graph is not None) for node in graph.nodes: if node.op == 'GridSampler': _, c, h, w = node.inputs[0].shape _, h_g, w_g, _ = node.inputs[1].shape align_corners = node.attrs['aligncorners'] inter_mode = node.attrs['interpolationmode'] pad_mode = node.attrs['paddingmode'] m_type = 0 if node.inputs[0].dtype == np.float32 else 1 buffer = np.array([c, h, w, h_g, w_g], dtype=np.int64).tobytes('C') \ + np.array([inter_mode, pad_mode], dtype=np.int32).tobytes('C') \ + np.array([align_corners], dtype=np.bool).tobytes('C') \ + np.array([m_type], dtype=np.int32).tobytes('C') node.attrs = { 'name': 'GridSampler', 'version': '1', 'namespace': "", 'data': buffer } node.op = 'TRT_PluginV2' onnx.save(gs.export_onnx(graph), onnx_model_file)
def clamp_weights_onnx(onnx_input_fpath: str, onnx_output_fpath: str, min: float, max: float, ignore_nodes: List = None): """ Clamps given onnx model to targeted upper and lower bounds. """ graph = gs.import_onnx(onnx.load(onnx_input_fpath)) if ignore_nodes is None: ignore_nodes = {} else: ignore_nodes = {k: True for k in ignore_nodes} for tensor in graph.tensors().values(): if tensor.name in ignore_nodes or isinstance(tensor, gs.ir.tensor.Variable): continue np.clip(tensor.values, min, max, out=tensor.values) for tensor in graph.nodes: node_attr = tensor.attrs.get("value", None) if tensor.name in ignore_nodes: continue if node_attr is not None: np.clip(node_attr.values, min, max, out=node_attr.values) model = gs.export_onnx(graph) onnx.save(model, onnx_output_fpath)
def test_reduce_with_constant(self): # Should be no failure when models including Constant nodes use fallback # shape inference; Constant nodes will be lowered to constant tensors. with tempfile.TemporaryDirectory() as outdir: run_polygraphy_debug( [ "reduce", ONNX_MODELS["reducable_with_const"].path, "--no-shape-inference", "--mode=linear", "--output=reduced.onnx", ] + [ "--check", TestReduce.FAKE_REDUCE_CHECKER, "polygraphy_debug.onnx", "--fail-node", "onnx_graphsurgeon_node_3", ], disable_verbose=True, cwd=outdir, ) model = onnx_from_path(os.path.join(outdir, "reduced.onnx")) graph = gs.import_onnx(model) assert len(graph.nodes) == 1 assert graph.nodes[0].name == "onnx_graphsurgeon_node_3" # Outputs of Constant nodes should not become Variables; thus the model should have no inputs. assert not graph.inputs
def post_process_packnet(model_file, opset=11): """ Use ONNX graph surgeon to replace upsample and instance normalization nodes. Refer to post_processing.py for details. Args: model_file : Path to ONNX file """ # Load the packnet graph graph = gs.import_onnx(onnx.load(model_file)) if opset == 11: graph = process_pad_nodes(graph) # Replace the subgraph of upsample with a single node with input and scale factor. graph = process_upsample_nodes(graph, opset) # Convert the group normalization subgraph into a single plugin node. graph = process_groupnorm_nodes(graph) # Remove unused nodes, and topologically sort the graph. graph.cleanup().toposort() # Export the onnx graph from graphsurgeon onnx.save_model(gs.export_onnx(graph), model_file) print("Saving the ONNX model to {}".format(model_file))
def infer(self): """ Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values. When possible, run shape inference on the ONNX graph to determine tensor shapes. """ for i in range(3): count_before = len(self.graph.nodes) self.graph.cleanup().toposort() try: for node in self.graph.nodes: for o in node.outputs: o.shape = None model = gs.export_onnx(self.graph) model = shape_inference.infer_shapes(model) self.graph = gs.import_onnx(model) except Exception as e: log.info( "Shape inference could not be performed at this time:\n{}". format(e)) try: self.graph.fold_constants(fold_shapes=True) except TypeError as e: log.error( "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " "onnx_graphsurgeon module. Error:\n{}".format(e)) raise count_after = len(self.graph.nodes) if count_before == count_after: # No new folding occurred in this iteration, so we can stop for now. break
def modify(input: str, output: str, downsample_ratio: float = 0.25) -> None: print(f'\nonnx load: {input}') graph = gs.import_onnx(onnx.load(input)) _print_graph(graph) # update node Resize_3: scales resize_3 = [n for n in graph.nodes if n.name == 'Resize_3'][0] print() print(resize_3) scales = gs.Constant( '388', np.asarray([1, 1, downsample_ratio, downsample_ratio], dtype=np.float32)) resize_3.inputs = [ i if i.name != '388' else scales for i in resize_3.inputs ] print() print(resize_3) # remove input downsample_ratio graph.inputs = [i for i in graph.inputs if i.name != 'downsample_ratio'] # remove node Concat_2 concat_2 = [n for n in graph.nodes if n.name == 'Concat_2'][0] concat_2.outputs.clear() # remove unused nodes/tensors graph.cleanup() onnx.save(gs.export_onnx(graph), output)
def modify_onnx(onnx_model_filepath="vot_opset_10.onnx", modified_onnx_model_filepath="vot_opset_10_modified.onnx"): """Modifies onnx model to fix issues with running on TRT. Args: onnx_model_filepath: Input onnx file path. modified_onnx_model_filepath: Output onnx file path. Raises: Exception: There are still uint8's that have not been converted. """ orig_model = onnx.load(onnx_model_filepath) inferred_model = onnx.shape_inference.infer_shapes(orig_model) graph = gs.import_onnx(inferred_model) if count_uint8(graph=graph) > 0: print("Fixing UINT8 issues...") graph = fix_uint8_tensors(graph=graph) if count_uint8(graph=graph) > 0: raise Exception("UINT8 issue has not been fixed!") else: print("UINT8 issue has been fixed!") print("Replacing CombinedNMS to BatchedNMS...") graph = replace_combined_nms(graph=graph) onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath) print("CombinedNMS has been replaced to BatchedNMS!")
def __init__( self, checkpoint_path: str, *, version: str = "r6.0", input_sample: Optional[Tensor] = None, enable_dynamic: bool = False, device: torch.device = torch.device("cpu"), precision: str = "fp32", ): checkpoint_path = Path(checkpoint_path) assert checkpoint_path.exists() # Use YOLOTRTInference to convert saved model to an initial ONNX graph. model = YOLOTRTInference(checkpoint_path, version=version) model = model.eval() model = model.to(device=device) logger.info(f"Loaded saved model from {checkpoint_path}") onnx_model_path = checkpoint_path.with_suffix(".onnx") if input_sample is not None: input_sample = input_sample.to(device=device) model.to_onnx(onnx_model_path, input_sample=input_sample, enable_dynamic=enable_dynamic) self.graph = gs.import_onnx(onnx.load(onnx_model_path)) assert self.graph logger.info("PyTorch2ONNX graph created successfully") # Fold constants via ONNX-GS that PyTorch2ONNX may have missed self.graph.fold_constants() self.num_classes = model.num_classes self.batch_size = 1 self.precision = precision
def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"): """重新修改resize的实现 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "LeakyRelu"][0] # 找到 LeakyRelu 的节点 # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0] # 找到 LeakyRelu 的节点 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.outputs = [inp for inp in first_add.outputs] # 找到其对应的输出 first_add.outputs.clear( ) # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出 # graph.nodes.remove(first_add) # 删除整个节点 second_add = [node for node in graph.nodes if node.op == "MaxPool"][0] # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0] second_add.inputs.clear() # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入 # 重新定义LeakyRelu层 attrs = {"alpha": 0.1} lrelu = gs.Variable("new_lrelu", np.float32) node = gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs=attrs) graph.nodes.append(node) # 重新定义resize层(实现upsample) attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = "new_resize" # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) # inputs =first_add.outputs inputs = [lrelu] inputs.append(roi) inputs.append(scale) resize = gs.Variable(layer_name, dtype=np.float32) node = gs.Node(op="Resize", inputs=inputs, outputs=[resize], attrs=attrs) graph.nodes.append(node) # 重新设置下一层的输入节点 second_add.inputs = [resize] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def test_cast_elision(self): graph = gs.import_onnx(shape_cast_elision().load()) new_graph = graph.fold_constants() no_casts = True for node in new_graph.nodes: no_casts &= node.op != "Cast" assert no_casts
def test_reduce_shape_inference(self, opts): with tempfile.TemporaryDirectory() as outdir: status = run_polygraphy_debug(["reduce", ONNX_MODELS["dynamic_identity"].path, "--output=reduced.onnx", "--show-output", "--model-input-shapes=X:[1,2,5,5]"] + opts + ["--check", "false"], disable_verbose=True, cwd=outdir) model = onnx_from_path(os.path.join(outdir, "reduced.onnx")) graph = gs.import_onnx(model) assert tuple(graph.inputs[0].shape) == (1, 2, 5, 5) assert tuple(graph.outputs[0].shape) == (1, 2, 5, 5)
def test_force_fallback_shape_inference_will_override_model_shapes(self): with tempfile.NamedTemporaryFile() as outmodel: run_polygraphy_surgeon(["extract", ONNX_MODELS["dynamic_identity"].path, "-o", outmodel.name, "--outputs", "Y:auto", "--force-fallback-shape-inference"]) onnx_model_sanity_check(outmodel.name) graph = gs.import_onnx(onnx.load(outmodel.name)) # Inputs should become fixed since fallback shape inference is being forced. for tensor in graph.tensors().values(): assert tensor.shape is not None assert tuple(graph.inputs[0].shape) == (1, 2, 1, 1) assert tuple(graph.outputs[0].shape) == (1, 2, 1, 1)
def __init__(self, onnx_model_file, req_jsons, dynamic_batch): self.onnx_model_file = onnx_model_file self.onnx_model_fixed_file = self.onnx_model_file.split( '.onnx')[0] + '_tuned.onnx' self.onnx_model = onnx.load(onnx_model_file) self.onnx_model_fixed = None self.req_jsons = req_jsons self.req_json_dicts = [] self.graph = gs.import_onnx(self.onnx_model) self.dynamic_batch = dynamic_batch self._validate_requests()
def test_extract_onnx_gs_graph(self, extract_model): model, input_meta, output_meta = extract_model graph = gs.import_onnx(model) graph = extract_subgraph(graph, input_meta, output_meta) assert isinstance(graph, gs.Graph) assert len(graph.nodes) == 1 assert len(graph.inputs) == 1 assert graph.inputs[0].name == "X" assert len(graph.outputs) == 1 assert graph.outputs[0].name == "identity_out_0"
def check_model(self, model): graph = gs.import_onnx(model) assert len(graph.nodes) == 1 assert len(graph.inputs) == 1 assert graph.inputs[0].name == "X" assert graph.inputs[0].shape is not None assert graph.inputs[0].dtype is not None assert len(graph.outputs) == 1 assert graph.outputs[0].name == "identity_out_0" assert graph.outputs[0].dtype is not None
def main(): parser = argparse.ArgumentParser(description="Add batchedNMSPlugin") parser.add_argument("-f", "--model", help="Path to the ONNX model generated by export_model.py", default="yolov4_1_3_416_416.onnx") parser.add_argument("-t", "--topK", help="number of bounding boxes for nms", default=2000) parser.add_argument("-k", "--keepTopK", help="bounding boxes to be kept per image", default=1000) args, _ = parser.parse_known_args() graph = gs.import_onnx(onnx.load(args.model)) graph = create_and_add_plugin_node(graph, int(args.topK), int(args.keepTopK)) onnx.save(gs.export_onnx(graph), args.model + ".nms.onnx")
def infer_model(path): model = onnx.load(path) graph = gs.import_onnx(model) feed_dict = {} for tensor in graph.inputs: feed_dict[tensor.name] = np.random.random_sample(size=tensor.shape).astype(tensor.dtype) output_names = [out.name for out in graph.outputs] sess = onnxruntime.InferenceSession(model.SerializeToString()) outputs = sess.run(output_names, feed_dict) G_LOGGER.info("Inference outputs: {:}".format(outputs)) return outputs
def test_no_load_constants(self): graph = gs.import_onnx(const_foldable().load()) new_graph = graph.fold_constants() def check_no_const_loaded(graph): num_lazy_constants = 0 for tensor in graph.tensors().values(): if isinstance(tensor, Constant) and isinstance( tensor._values, LazyValues): num_lazy_constants += 1 assert num_lazy_constants == 3 # Graph starts with 3 constants - none should be loaded. check_no_const_loaded(graph) check_no_const_loaded(new_graph)
def __init__(self, saved_model_path, legacy_plugins=False): """ Constructor of the EfficientDet Graph Surgeon object, to do the conversion of an EfficientDet TF saved model to an ONNX-TensorRT parsable model. :param saved_model_path: The path pointing to the TensorFlow saved model to load. :param legacy_plugins: If using TensorRT version < 8.0.1, set this to True to use older (but slower) plugins. """ saved_model_path = os.path.realpath(saved_model_path) assert os.path.exists(saved_model_path) # Use tf2onnx to convert saved model to an initial ONNX graph. graph_def, inputs, outputs = tf_loader.from_saved_model( saved_model_path, None, None, "serve", ["serving_default"]) log.info("Loaded saved model from {}".format(saved_model_path)) with tf.Graph().as_default() as tf_graph: tf.import_graph_def(graph_def, name="") with tf_loader.tf_session(graph=tf_graph): onnx_graph = tfonnx.process_tf_graph(tf_graph, input_names=inputs, output_names=outputs, opset=11) onnx_model = optimizer.optimize_graph(onnx_graph).make_model( "Converted from {}".format(saved_model_path)) self.graph = gs.import_onnx(onnx_model) assert self.graph log.info("TF2ONNX graph created successfully") # Fold constants via ONNX-GS that TF2ONNX may have missed self.graph.fold_constants() # Try to auto-detect by finding if nodes match a specific name pattern expected for either of the APIs. self.api = None if len( [node for node in self.graph.nodes if "class_net/" in node.name]) > 0: self.api = "AutoML" elif len([ node for node in self.graph.nodes if "/WeightSharedConvolutionalClassHead/" in node.name ]) > 0: self.api = "TFOD" assert self.api log.info("Graph was detected as {}".format(self.api)) self.batch_size = None self.legacy_plugins = legacy_plugins
def resize_model(cls, input_file="model.onnx", output_file="subgraph.onnx"): """修改模型的输入与输出(截断输入输出)""" model = onnx.load(input_file) graph = gs.import_onnx(model) # tensors = graph.tensors() # 重新设置模型的输入与输出 # graph.inputs = [tensors['x'].to_variable(np.float32)] # graph.outputs = [tensors['sigmoid'].to_variable(np.float32,shape=(1,8))] # 原本输出节点名为"softmax" first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] graph.outputs = [ first_add.outputs[0].to_variable(np.float32, shape=(1, 8)) ] graph.cleanup() onnx.save(gs.export_onnx(graph), output_file)
def main(): parser = argparse.ArgumentParser( description="Insert DCNv2 plugin node into ONNX model") parser.add_argument( "-i", "--input", help="Path to ONNX model with 'Plugin' node to replace with DCNv2_TRT", default="models/centertrack_DCNv2_named.onnx") parser.add_argument("-o", "--output", help="Path to output ONNX model with 'DCNv2_TRT' node", default="models/modified.onnx") args, _ = parser.parse_known_args() graph = gs.import_onnx(onnx.load(args.input)) graph = process_graph(graph) onnx.save(gs.export_onnx(graph), args.output)
def main(): org_model_file_path = getFilePath('samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx') print("Modifying the ONNX model ...") original_model = onnx.load(org_model_file_path) graph = gs.import_onnx(original_model) drop_category_mapper_nodes(graph) replace_unsupported_ops(graph) save_weights_for_refitting(graph) new_model = gs.export_onnx(graph) modified_model_name = "bidaf-modified.onnx" onnx.checker.check_model(new_model) onnx.save(new_model, modified_model_name) print("Modified ONNX model saved as {}".format(modified_model_name)) print("Done.")
def remove_model(cls, input_file="model.onnx", output_file="removed.onnx"): """删除某个节点 删除sigmoid节点 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 first_add.outputs.clear() # 找到下一个节点 重新设置输入 next_add = [node for node in graph.nodes if node.op == "Softmax"][-1] # 找到最后一个名为 Sigmoid 的节点 next_add.inputs.clear() # 先清除,再重新指定 next_add.inputs = first_add.inputs # 重新设置输入 # Remove the fake node from the graph completely graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def modify_onnx(onnx_model_filepath="vot_opset_10.onnx", modified_onnx_model_filepath="vot_opset_10_modified.onnx"): orig_model = onnx.load(onnx_model_filepath) inferred_model = onnx.shape_inference.infer_shapes(orig_model) graph = gs.import_onnx(inferred_model) if count_uint8(graph=graph) > 0: print("Fixing UINT8 issues...") graph = fix_uint8_tensors(graph=graph) if count_uint8(graph=graph) > 0: raise Exception("UINT8 issue has not been fixed!") else: print("UINT8 issue has been fixed!") print("Replacing CombinedNMS to BatchedNMS...") graph = replace_combinedNMS(graph=graph) onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath) print("CombinedNMS has been replaced to BatchedNMS!")
node7 = gs.Node("Conv", "Conv1", inputs=[tensorLoop, constant1x32], outputs=[tensor7]) graphNodeList.append(node7) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor7], opset=13) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile0) print("Succeeded building %s!" % (onnxFile0)) # 修改 .onnx graph = gs.import_onnx(onnx.load(onnxFile0)) constant32r = gs.Constant( "constant32r", np.ascontiguousarray( np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) * 2 - 1)) for node in graph.nodes: if node.op in ['Unsqueeze', 'Squeeze']: node.o().inputs[0] = node.inputs[0] if node.op == 'Transpose': if node.o().op == 'Add': node.o().inputs[1] = constant32r node.o().inputs[0] = node.inputs[0]
conv_weights_transposed = np.transpose(conv_weights_tensor.values, [3, 2, 0, 1]) conv_weights_tensor.values = conv_weights_transposed # Remove the transpose nodes after the dequant node. TensorRT does not support transpose nodes after QDQ nodes. dequant_node_output = node.i(1).i(0).outputs[0] node.inputs[1] = dequant_node_output # Remove unused nodes, and topologically sort the graph. return graph.cleanup().toposort() if __name__ == '__main__': parser = argparse.ArgumentParser( "Post process ONNX graph by removing transpose nodes") parser.add_argument("--input", required=True, help="Input onnx graph") parser.add_argument("--output", default='postprocessed_rn50.onnx', help="Name of post processed onnx graph") args = parser.parse_args() # Load the rn50 graph graph = gs.import_onnx(onnx.load(args.input)) # Remove the transpose nodes and reshape the convolution weights graph = process_transpose_nodes(graph) # Export the onnx graph from graphsurgeon onnx_model = gs.export_onnx(graph) print("Output ONNX graph generated: ", args.output) onnx.save_model(onnx_model, args.output)
def main(): tf.set_random_seed(1234) np.random.seed(0) iterations = 100 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: batch_size = 16 input_data = np.random.rand(batch_size, 256).astype(np.float32) input_ph = tf.placeholder(dtype=tf.float32, shape=[batch_size, 256], name="input") x = tf.layers.dense(input_ph, 256) # test one_hot depth = 256 indices = tf.cast(tf.clip_by_value(tf.reshape(x, [-1]), 0, depth - 1), tf.int32) x = tf.one_hot(indices, depth) x = tf.reshape(x, [batch_size, -1]) x = tf.layers.dense(x, 256) output = tf.identity(x, name="output") sess.run(tf.global_variables_initializer()) time_sum = 0 a = datetime.now() for i in range(iterations): tf_result = sess.run([output], {input_ph: input_data}) b = datetime.now() time_sum = (b - a).total_seconds() tf_time = "[INFO] TF execution time " + str( time_sum * 1000 / iterations) + " ms" print(tf_time) output_name_without_port = ["output"] frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_name_without_port) # save frozen model with open("test_op.pb", "wb") as ofile: ofile.write(frozen_graph.SerializeToString()) model_file = "test_op.onnx" os.system( "python3 -m tf2onnx.convert --input test_op.pb --inputs input:0 --outputs output:0 --output test_op.onnx --verbose --opset 11" ) ### use ONNX GraphSurgeon # ONNX operator is required to keep aligned (like name, inputs, outputs and attributes) with TensorRT plugin to use Fallback mechanism. # ONNX GraphSurgeon is useful for modification and you can install it by the following commands. # pip install nvidia-pyindex # pip install onnx-graphsurgeon graph = gs.import_onnx(onnx.load(model_file)) graph = modify_onehot(graph) model_file = "test_op_onehot.onnx" onnx.save(gs.export_onnx(graph), model_file) # build trt model by onnx model cuda.Device(0).make_context() with trt.Builder(TRT_LOGGER) as builder, builder.create_network( 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_batch_size = batch_size with open(model_file, 'rb') as model: # parse onnx model parser.parse(model.read()) for i in range(parser.num_errors): print(parser.get_error(i)) engine = builder.build_engine(network, builder.create_builder_config()) if engine == None: print("[ERROR] engine is None") exit(-1) inputs, outputs, bindings, stream = allocate_buffers(engine) with engine.create_execution_context() as context: input_data = input_data.ravel() np.copyto(inputs[0].host, input_data) time_sum = 0 a = datetime.now() for i in range(iterations): np.copyto(inputs[0].host, input_data) output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) b = datetime.now() time_sum = (b - a).total_seconds() trt_time = ("TRT execution time " + str(time_sum * 1000 / iterations) + " ms") trt_result = output for i in range(len(trt_result)): print( "trt cross_check output_%d " % i + str(np.allclose(tf_result[i].flatten(), trt_result[i], atol=1e-5))) print("max diff " + str(np.fabs(tf_result[i].flatten() - trt_result[i]).max())) print("min diff " + str(np.fabs(tf_result[i].flatten() - trt_result[i]).min())) print(tf_time) print(trt_time) cuda.Context.pop()
output_names = ['y'] torch.onnx.export( custom, (x, grid), src_onnx, input_names=input_names, output_names=output_names, opset_version=11, verbose=True, operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH, do_constant_folding=False) import onnx_graphsurgeon as gs import onnx import numpy as np graph = gs.import_onnx(onnx.load(src_onnx)) for node in graph.nodes: if node.op == 'Resize': # actually not used in this sample node_concat = node.i(2, 0) node_concat.i(0, 0).attrs['value'] = gs.Constant( '', np.concatenate((node_concat.i(0, 0).attrs['value'].values, node_concat.i(1, 0).attrs['value'].values))) node.inputs[2] = node_concat.inputs[0] node_concat.outputs.clear() if node.op == 'Clip': node_cast0 = node.i(1, 0) node_cast1 = node.i(2, 0)