def modify_onnx(onnx_model_filepath="vot_opset_10.onnx", modified_onnx_model_filepath="vot_opset_10_modified.onnx"): """Modifies onnx model to fix issues with running on TRT. Args: onnx_model_filepath: Input onnx file path. modified_onnx_model_filepath: Output onnx file path. Raises: Exception: There are still uint8's that have not been converted. """ orig_model = onnx.load(onnx_model_filepath) inferred_model = onnx.shape_inference.infer_shapes(orig_model) graph = gs.import_onnx(inferred_model) if count_uint8(graph=graph) > 0: print("Fixing UINT8 issues...") graph = fix_uint8_tensors(graph=graph) if count_uint8(graph=graph) > 0: raise Exception("UINT8 issue has not been fixed!") else: print("UINT8 issue has been fixed!") print("Replacing CombinedNMS to BatchedNMS...") graph = replace_combined_nms(graph=graph) onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath) print("CombinedNMS has been replaced to BatchedNMS!")
def add_model(cls, input_file="model.onnx", output_file="add.onnx"): """增加节点 在Sigmoid 前增加 LeakyRelu 节点() """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # 找到最后一个名为 Sigmoid 的节点 # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点) # 2. Change the Add to a LeakyRelu lrelu = gs.Variable('new_lrelu', dtype=np.float32) graph.nodes.append( gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs={"alpha": 0.02})) # 此时 sigmoid输入变成了lrelu(输出) first_add.inputs.clear() first_add.inputs = [lrelu] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def onnx_change(onnx_path): '''该部分代码由导师提供,解决trt inference 全是0的问题,感谢!!! ''' node_configs = [(2682, 2684), (2775, 2777), (2961, 2963), (3333, 3335), (4077, 4079)] if 'batch_2' in onnx_path: node_number = node_configs[1] elif 'batch_4' in onnx_path: node_number = node_configs[2] elif 'batch_8' in onnx_path: node_number = node_configs[3] elif 'batch_16' in onnx_path: node_number = node_configs[4] else: node_number = node_configs[0] graph = gs.import_onnx(onnx.load(onnx_path)) for node in graph.nodes: if node.name == f"Gather_{node_number[0]}": print(node.inputs[1]) node.inputs[1].values = np.int64(5) print(node.inputs[1]) elif node.name == f"Gather_{node_number[1]}": print(node.inputs[1]) node.inputs[1].values = np.int64(5) print(node.inputs[1]) onnx.save(gs.export_onnx(graph), onnx_path) print(f"[INFO] onnx修改完成, 保存在{onnx_path}.")
def modify(input: str, output: str, downsample_ratio: float = 0.25) -> None: print(f'\nonnx load: {input}') graph = gs.import_onnx(onnx.load(input)) _print_graph(graph) # update node Resize_3: scales resize_3 = [n for n in graph.nodes if n.name == 'Resize_3'][0] print() print(resize_3) scales = gs.Constant( '388', np.asarray([1, 1, downsample_ratio, downsample_ratio], dtype=np.float32)) resize_3.inputs = [ i if i.name != '388' else scales for i in resize_3.inputs ] print() print(resize_3) # remove input downsample_ratio graph.inputs = [i for i in graph.inputs if i.name != 'downsample_ratio'] # remove node Concat_2 concat_2 = [n for n in graph.nodes if n.name == 'Concat_2'][0] concat_2.outputs.clear() # remove unused nodes/tensors graph.cleanup() onnx.save(gs.export_onnx(graph), output)
def clamp_weights_onnx(onnx_input_fpath: str, onnx_output_fpath: str, min: float, max: float, ignore_nodes: List = None): """ Clamps given onnx model to targeted upper and lower bounds. """ graph = gs.import_onnx(onnx.load(onnx_input_fpath)) if ignore_nodes is None: ignore_nodes = {} else: ignore_nodes = {k: True for k in ignore_nodes} for tensor in graph.tensors().values(): if tensor.name in ignore_nodes or isinstance(tensor, gs.ir.tensor.Variable): continue np.clip(tensor.values, min, max, out=tensor.values) for tensor in graph.nodes: node_attr = tensor.attrs.get("value", None) if tensor.name in ignore_nodes: continue if node_attr is not None: np.clip(node_attr.values, min, max, out=node_attr.values) model = gs.export_onnx(graph) onnx.save(model, onnx_output_fpath)
def do_graph_surgeon(self): print("Getting new node params") for i, node in enumerate(self.graph.nodes): self._simplify_node(i, node) for json_dict in self.req_json_dicts: print(json_dict) if 'add_node_req' in json_dict: print("Got add_node_request") self._add_node(json_dict['add_node_req']) elif "delete_node_req" in json_dict: print("Got delete_node_request") self._delete_node(json_dict['delete_node_req']) elif "change_node_req" in json_dict: print("Got change_node_request") self._change_node(json_dict['change_node_req']) else: raise SystemExit( 'ERROR: It does not support the requested surgeon.') self.onnx_model_fixed = gs.export_onnx(self.graph) self.onnx_model = onnx.save(self.onnx_model_fixed, self.onnx_model_fixed_file) print('Fixed ONNX Graph model has been saved to : ', self.onnx_model_fixed_file)
def post_process_packnet(model_file, opset=11): """ Use ONNX graph surgeon to replace upsample and instance normalization nodes. Refer to post_processing.py for details. Args: model_file : Path to ONNX file """ # Load the packnet graph graph = gs.import_onnx(onnx.load(model_file)) if opset == 11: graph = process_pad_nodes(graph) # Replace the subgraph of upsample with a single node with input and scale factor. graph = process_upsample_nodes(graph, opset) # Convert the group normalization subgraph into a single plugin node. graph = process_groupnorm_nodes(graph) # Remove unused nodes, and topologically sort the graph. graph.cleanup().toposort() # Export the onnx graph from graphsurgeon onnx.save_model(gs.export_onnx(graph), model_file) print("Saving the ONNX model to {}".format(model_file))
def infer(self): """ Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values. When possible, run shape inference on the ONNX graph to determine tensor shapes. """ for i in range(3): count_before = len(self.graph.nodes) self.graph.cleanup().toposort() try: for node in self.graph.nodes: for o in node.outputs: o.shape = None model = gs.export_onnx(self.graph) model = shape_inference.infer_shapes(model) self.graph = gs.import_onnx(model) except Exception as e: log.info( "Shape inference could not be performed at this time:\n{}". format(e)) try: self.graph.fold_constants(fold_shapes=True) except TypeError as e: log.error( "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your " "onnx_graphsurgeon module. Error:\n{}".format(e)) raise count_after = len(self.graph.nodes) if count_before == count_after: # No new folding occurred in this iteration, so we can stop for now. break
def modify_onnx(onnx_model_file): graph = gs.import_onnx(onnx.load(onnx_model_file)) assert (graph is not None) for node in graph.nodes: if node.op == 'GridSampler': _, c, h, w = node.inputs[0].shape _, h_g, w_g, _ = node.inputs[1].shape align_corners = node.attrs['aligncorners'] inter_mode = node.attrs['interpolationmode'] pad_mode = node.attrs['paddingmode'] m_type = 0 if node.inputs[0].dtype == np.float32 else 1 buffer = np.array([c, h, w, h_g, w_g], dtype=np.int64).tobytes('C') \ + np.array([inter_mode, pad_mode], dtype=np.int32).tobytes('C') \ + np.array([align_corners], dtype=np.bool).tobytes('C') \ + np.array([m_type], dtype=np.int32).tobytes('C') node.attrs = { 'name': 'GridSampler', 'version': '1', 'namespace': "", 'data': buffer } node.op = 'TRT_PluginV2' onnx.save(gs.export_onnx(graph), onnx_model_file)
def export_graph(self, graph, args, do_type_check=True): if not args.no_cleanup: graph.cleanup() if not args.no_toposort: graph.toposort() G_LOGGER.info("Writing model to: {output}. To see more details about the model, use: polygraphy inspect model {output} --mode=basic".format(output=args.output)) onnx.save(gs.export_onnx(graph, do_type_check=do_type_check), args.output)
def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"): """重新修改resize的实现 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "LeakyRelu"][0] # 找到 LeakyRelu 的节点 # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0] # 找到 LeakyRelu 的节点 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.outputs = [inp for inp in first_add.outputs] # 找到其对应的输出 first_add.outputs.clear( ) # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出 # graph.nodes.remove(first_add) # 删除整个节点 second_add = [node for node in graph.nodes if node.op == "MaxPool"][0] # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0] second_add.inputs.clear() # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入 # 重新定义LeakyRelu层 attrs = {"alpha": 0.1} lrelu = gs.Variable("new_lrelu", np.float32) node = gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs=attrs) graph.nodes.append(node) # 重新定义resize层(实现upsample) attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = "new_resize" # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) # inputs =first_add.outputs inputs = [lrelu] inputs.append(roi) inputs.append(scale) resize = gs.Variable(layer_name, dtype=np.float32) node = gs.Node(op="Resize", inputs=inputs, outputs=[resize], attrs=attrs) graph.nodes.append(node) # 重新设置下一层的输入节点 second_add.inputs = [resize] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def save_model(self): # Note that initializers do not necessarily have to be graph inputs graph = gs.Graph(nodes=self.node, inputs=self.inputs, outputs=self.outputs) # print(onnx.helper.printable_graph(graph)) onnx.save(gs.export_onnx(graph), self.output_file_path) """验证保存的.onnx格式是否正确""" onnx_model = onnx.load(self.output_file_path) onnx.checker.check_model(onnx_model)
def run(nM,nK,nN): tensor0 = gs.Variable("tensor0", np.float32, [nM, 1]) constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1)) constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1)) constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1)) constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1)) constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1)) constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64))) graphNodeList = [] tensor1 = gs.Variable("tensor1", np.float32, None) node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1]) graphNodeList.append(node1) tensorLoop = tensor1 for i in range(nLoop): tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None) node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2]) graphNodeList.append(node2) tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None) node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3]) graphNodeList.append(node3) tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None) node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4]) graphNodeList.append(node4) tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None) node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5]) graphNodeList.append(node5) tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None) node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6]) graphNodeList.append(node6) tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None) node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7]) graphNodeList.append(node7) tensorLoop = tensor7 tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None) node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)])) graphNodeList.append(node8) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13) onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile) print("Succeeded building %s!" % (onnxFile)) os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
def save(self, output_path): """ Save the ONNX model to the given location. :param output_path: Path pointing to the location where to write out the updated ONNX model. """ self.graph.cleanup().toposort() model = gs.export_onnx(self.graph) output_path = os.path.realpath(output_path) os.makedirs(os.path.dirname(output_path), exist_ok=True) onnx.save(model, output_path) log.info("Saved ONNX model to {}".format(output_path))
def save(self, output_path): """ Save the ONNX model to the given location. Args: output_path: Path pointing to the location where to write out the updated ONNX model. """ self.graph.cleanup().toposort() model = gs.export_onnx(self.graph) onnx.save(model, output_path) logger.info(f"Saved ONNX model to {output_path}")
def main(): parser = argparse.ArgumentParser(description="Add batchedNMSPlugin") parser.add_argument("-f", "--model", help="Path to the ONNX model generated by export_model.py", default="yolov4_1_3_416_416.onnx") parser.add_argument("-t", "--topK", help="number of bounding boxes for nms", default=2000) parser.add_argument("-k", "--keepTopK", help="bounding boxes to be kept per image", default=1000) args, _ = parser.parse_known_args() graph = gs.import_onnx(onnx.load(args.model)) graph = create_and_add_plugin_node(graph, int(args.topK), int(args.keepTopK)) onnx.save(gs.export_onnx(graph), args.model + ".nms.onnx")
def resize_model(cls, input_file="model.onnx", output_file="subgraph.onnx"): """修改模型的输入与输出(截断输入输出)""" model = onnx.load(input_file) graph = gs.import_onnx(model) # tensors = graph.tensors() # 重新设置模型的输入与输出 # graph.inputs = [tensors['x'].to_variable(np.float32)] # graph.outputs = [tensors['sigmoid'].to_variable(np.float32,shape=(1,8))] # 原本输出节点名为"softmax" first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] graph.outputs = [ first_add.outputs[0].to_variable(np.float32, shape=(1, 8)) ] graph.cleanup() onnx.save(gs.export_onnx(graph), output_file)
def main(): parser = argparse.ArgumentParser( description="Insert DCNv2 plugin node into ONNX model") parser.add_argument( "-i", "--input", help="Path to ONNX model with 'Plugin' node to replace with DCNv2_TRT", default="models/centertrack_DCNv2_named.onnx") parser.add_argument("-o", "--output", help="Path to output ONNX model with 'DCNv2_TRT' node", default="models/modified.onnx") args, _ = parser.parse_known_args() graph = gs.import_onnx(onnx.load(args.input)) graph = process_graph(graph) onnx.save(gs.export_onnx(graph), args.output)
def main(): org_model_file_path = getFilePath('samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx') print("Modifying the ONNX model ...") original_model = onnx.load(org_model_file_path) graph = gs.import_onnx(original_model) drop_category_mapper_nodes(graph) replace_unsupported_ops(graph) save_weights_for_refitting(graph) new_model = gs.export_onnx(graph) modified_model_name = "bidaf-modified.onnx" onnx.checker.check_model(new_model) onnx.save(new_model, modified_model_name) print("Modified ONNX model saved as {}".format(modified_model_name)) print("Done.")
def remove_model(cls, input_file="model.onnx", output_file="removed.onnx"): """删除某个节点 删除sigmoid节点 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 first_add.outputs.clear() # 找到下一个节点 重新设置输入 next_add = [node for node in graph.nodes if node.op == "Softmax"][-1] # 找到最后一个名为 Sigmoid 的节点 next_add.inputs.clear() # 先清除,再重新指定 next_add.inputs = first_add.inputs # 重新设置输入 # Remove the fake node from the graph completely graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def modify_onnx(onnx_model_filepath="vot_opset_10.onnx", modified_onnx_model_filepath="vot_opset_10_modified.onnx"): orig_model = onnx.load(onnx_model_filepath) inferred_model = onnx.shape_inference.infer_shapes(orig_model) graph = gs.import_onnx(inferred_model) if count_uint8(graph=graph) > 0: print("Fixing UINT8 issues...") graph = fix_uint8_tensors(graph=graph) if count_uint8(graph=graph) > 0: raise Exception("UINT8 issue has not been fixed!") else: print("UINT8 issue has been fixed!") print("Replacing CombinedNMS to BatchedNMS...") graph = replace_combinedNMS(graph=graph) onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath) print("CombinedNMS has been replaced to BatchedNMS!")
"python3 -m tf2onnx.convert --opset 11 --input %s --output %s --inputs 'input_0:0' --outputs '%s:0' --inputs-as-nchw 'x:0'" % (pb2File, onnxFile, outputNodeName)) print("Succeeded converting model into onnx!") # 优化 .onnx 文件,去除 Conv 前的 Transpose 节点 -------------------------------- graph = gs.import_onnx(onnx.load(onnxFile)) # 原 repo 中解释,导出的计算图中 Conv 的 Weight 输入前会有一个 Transpose 节点,并且 TensorRT QAT 模式不支持这个节点,这里用于手工转置并去除该 Transpose 节点 # 但是在目前导出的计算图中已经没有了这个节点,不再需要这一步 if isRemoveTransposeNode: for node in [n for n in graph.nodes if n.op == "Conv"]: convKernelTensor = node.i(1).i().i().inputs[0] convKernelTensor.values = convKernelTensor.values.transpose(3, 2, 0, 1) node.inputs[1] = node.i(1).i(0).outputs[0] onnx.save_model(gs.export_onnx(graph.cleanup().toposort()), onnx2File) print("Succeeded optimizing .onnx in Onnx!") # TensorRT 中加载 .onnx 创建 engine --------------------------------------------- logger = trt.Logger(trt.Logger.ERROR) builder = trt.Builder(logger) networkFlag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) | ( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) network = builder.create_network(networkFlag) profile = builder.create_optimization_profile() config = builder.create_builder_config() config.flags = 1 << int(trt.BuilderFlag.INT8) config.max_workspace_size = 3 << 30 parser = trt.OnnxParser(network, logger) if not os.path.exists(onnxFile): print("Failed finding .onnx file!")
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224)) # Since W is a Constant, it will automatically be exported as an initializer W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222)) node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) # Note that initializers do not necessarily have to be graph inputs graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) onnx.save(gs.export_onnx(graph), "test_conv.onnx")
conv_weights_transposed = np.transpose(conv_weights_tensor.values, [3, 2, 0, 1]) conv_weights_tensor.values = conv_weights_transposed # Remove the transpose nodes after the dequant node. TensorRT does not support transpose nodes after QDQ nodes. dequant_node_output = node.i(1).i(0).outputs[0] node.inputs[1] = dequant_node_output # Remove unused nodes, and topologically sort the graph. return graph.cleanup().toposort() if __name__ == '__main__': parser = argparse.ArgumentParser( "Post process ONNX graph by removing transpose nodes") parser.add_argument("--input", required=True, help="Input onnx graph") parser.add_argument("--output", default='postprocessed_rn50.onnx', help="Name of post processed onnx graph") args = parser.parse_args() # Load the rn50 graph graph = gs.import_onnx(onnx.load(args.input)) # Remove the transpose nodes and reshape the convolution weights graph = process_transpose_nodes(graph) # Export the onnx graph from graphsurgeon onnx_model = gs.export_onnx(graph) print("Output ONNX graph generated: ", args.output) onnx.save_model(onnx_model, args.output)
def main(): tf.set_random_seed(1234) np.random.seed(0) iterations = 100 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: batch_size = 16 input_data = np.random.rand(batch_size, 256).astype(np.float32) input_ph = tf.placeholder(dtype=tf.float32, shape=[batch_size, 256], name="input") x = tf.layers.dense(input_ph, 256) # test one_hot depth = 256 indices = tf.cast(tf.clip_by_value(tf.reshape(x, [-1]), 0, depth - 1), tf.int32) x = tf.one_hot(indices, depth) x = tf.reshape(x, [batch_size, -1]) x = tf.layers.dense(x, 256) output = tf.identity(x, name="output") sess.run(tf.global_variables_initializer()) time_sum = 0 a = datetime.now() for i in range(iterations): tf_result = sess.run([output], {input_ph: input_data}) b = datetime.now() time_sum = (b - a).total_seconds() tf_time = "[INFO] TF execution time " + str( time_sum * 1000 / iterations) + " ms" print(tf_time) output_name_without_port = ["output"] frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_name_without_port) # save frozen model with open("test_op.pb", "wb") as ofile: ofile.write(frozen_graph.SerializeToString()) model_file = "test_op.onnx" os.system( "python3 -m tf2onnx.convert --input test_op.pb --inputs input:0 --outputs output:0 --output test_op.onnx --verbose --opset 11" ) ### use ONNX GraphSurgeon # ONNX operator is required to keep aligned (like name, inputs, outputs and attributes) with TensorRT plugin to use Fallback mechanism. # ONNX GraphSurgeon is useful for modification and you can install it by the following commands. # pip install nvidia-pyindex # pip install onnx-graphsurgeon graph = gs.import_onnx(onnx.load(model_file)) graph = modify_onehot(graph) model_file = "test_op_onehot.onnx" onnx.save(gs.export_onnx(graph), model_file) # build trt model by onnx model cuda.Device(0).make_context() with trt.Builder(TRT_LOGGER) as builder, builder.create_network( 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_batch_size = batch_size with open(model_file, 'rb') as model: # parse onnx model parser.parse(model.read()) for i in range(parser.num_errors): print(parser.get_error(i)) engine = builder.build_engine(network, builder.create_builder_config()) if engine == None: print("[ERROR] engine is None") exit(-1) inputs, outputs, bindings, stream = allocate_buffers(engine) with engine.create_execution_context() as context: input_data = input_data.ravel() np.copyto(inputs[0].host, input_data) time_sum = 0 a = datetime.now() for i in range(iterations): np.copyto(inputs[0].host, input_data) output = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) b = datetime.now() time_sum = (b - a).total_seconds() trt_time = ("TRT execution time " + str(time_sum * 1000 / iterations) + " ms") trt_result = output for i in range(len(trt_result)): print( "trt cross_check output_%d " % i + str(np.allclose(tf_result[i].flatten(), trt_result[i], atol=1e-5))) print("max diff " + str(np.fabs(tf_result[i].flatten() - trt_result[i]).max())) print("min diff " + str(np.fabs(tf_result[i].flatten() - trt_result[i]).min())) print(tf_time) print(trt_time) cuda.Context.pop()
node_cast1 = node.i(2, 0) #change data type to fp32 node_cast0.i(0, 0).attrs['value'] = gs.Constant( '', np.asarray([-1.0], dtype=np.float32)) node_cast1.i(0, 0).attrs['value'] = gs.Constant( '', np.asarray([1.0], dtype=np.float32)) #skip cast node.inputs = [ node.inputs[0], node_cast0.inputs[0], node_cast1.inputs[0] ] #cleanup cast node_cast0.outputs.clear() node_cast1.outputs.clear() if node.op == 'grid_sampler': #cleanup 3 unused inputs for i in [4, 3, 2]: node.i(i, 0).outputs.clear() del node.inputs[i] graph.cleanup() onnx.save(gs.export_onnx(graph), dst_onnx) model = onnx.load(dst_onnx) # May not work with non-standard ONNX op #onnx.checker.check_model(model) #print(onnx.helper.printable_graph(model.graph)) #trtexec --verbose --onnx=custom_surgeon.onnx --saveEngine=custom_surgeon.trt --plugins=./GridSamplerPlugin.so
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import onnx print("Graph.fold_constants Help:\n{}".format(gs.Graph.fold_constants.__doc__)) graph = gs.import_onnx(onnx.load("model.onnx")) # Fold constants in the graph using ONNX Runtime. This will replace # expressions that can be evaluated prior to runtime with constant tensors. # The `fold_constants()` function will not, however, remove the nodes that # it replaced - it simply changes the inputs of subsequent nodes. # To remove these unused nodes, we can follow up `fold_constants()` with `cleanup()` graph.fold_constants().cleanup() onnx.save(gs.export_onnx(graph), "folded.onnx")
tensorLoop = tensor6 tensor7 = gs.Variable("tensor-6", dtype=np.float32, shape=None) node7 = gs.Node("Conv", "Conv1", inputs=[tensorLoop, constant1x32], outputs=[tensor7]) graphNodeList.append(node7) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor7], opset=13) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile0) print("Succeeded building %s!" % (onnxFile0)) # 修改 .onnx graph = gs.import_onnx(onnx.load(onnxFile0)) constant32r = gs.Constant( "constant32r", np.ascontiguousarray( np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) * 2 - 1)) for node in graph.nodes: if node.op in ['Unsqueeze', 'Squeeze']: node.o().inputs[0] = node.inputs[0]
import numpy as np import onnx # Computes outputs = input + ((a + b) + d) shape = (1, 3) # Inputs input = gs.Variable("input", shape=shape, dtype=np.float32) # Intermediate tensors a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32)) b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32)) c = gs.Variable("c") d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32)) e = gs.Variable("e") # Outputs output = gs.Variable("output", shape=shape, dtype=np.float32) nodes = [ # c = (a + b) gs.Node("Add", inputs=[a, b], outputs=[c]), # e = (c + d) gs.Node("Add", inputs=[c, d], outputs=[e]), # output = input + e gs.Node("Add", inputs=[input, e], outputs=[output]), ] graph = gs.Graph(nodes=nodes, inputs=[input], outputs=[output]) onnx.save(gs.export_onnx(graph), "model.onnx")
inputs=[ constantData, # data wiliConstant0, # start=0 graph.inputs[0], # end wiliConstant3, # axes=3 wiliConstant1, # step=1 ], outputs=[sliceV]) graph.nodes.append(sliceN) graph.outputs.append(sliceV) nSlice += 1 tansposeNode.outputs = [] continue graph.cleanup() onnx.save(gs.export_onnx(graph), onnxFile1) def run(onnxFile): logger = trt.Logger(trt.Logger.VERBOSE) builder = trt.Builder(logger) network = builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) profile = builder.create_optimization_profile() config = builder.create_builder_config() config.max_workspace_size = 22 << 30 parser = trt.OnnxParser(network, logger) with open(onnxFile, 'rb') as model: parser.parse(model.read())