def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"): """重新修改resize的实现 """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "LeakyRelu"][0] # 找到 LeakyRelu 的节点 # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0] # 找到 LeakyRelu 的节点 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.outputs = [inp for inp in first_add.outputs] # 找到其对应的输出 first_add.outputs.clear( ) # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出 # graph.nodes.remove(first_add) # 删除整个节点 second_add = [node for node in graph.nodes if node.op == "MaxPool"][0] # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0] second_add.inputs.clear() # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入 # 重新定义LeakyRelu层 attrs = {"alpha": 0.1} lrelu = gs.Variable("new_lrelu", np.float32) node = gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs=attrs) graph.nodes.append(node) # 重新定义resize层(实现upsample) attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = "new_resize" # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) # inputs =first_add.outputs inputs = [lrelu] inputs.append(roi) inputs.append(scale) resize = gs.Variable(layer_name, dtype=np.float32) node = gs.Node(op="Resize", inputs=inputs, outputs=[resize], attrs=attrs) graph.nodes.append(node) # 重新设置下一层的输入节点 second_add.inputs = [resize] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def add_node(self, graph): print("Start adding node(op = %s)"%(self.plugin_name)) batch_size = graph.inputs[0].shape[0] input_h = graph.inputs[0].shape[2] input_w = graph.inputs[0].shape[3] print("width %d, height %d"%(input_h, input_w)) print("target Plugin: %s"%(self.plugin_name)) tensors = graph.tensors() boxes_tensor = tensors['boxes'] confs_tensor = tensors['confs'] num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1]) nms_boxes = gs.Variable(name="nms_boxes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK, 4]) nms_scores = gs.Variable(name="nms_scores").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK]) nms_classes = gs.Variable(name="nms_classes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK]) outputs = [num_detections, nms_boxes, nms_scores, nms_classes] nms_node = gs.Node( op=self.plugin_name, attrs=self.attrs, inputs=[boxes_tensor, confs_tensor], outputs=outputs) graph.nodes.append(nms_node) graph.outputs = outputs print("ADD graph node surgery complete") return graph.cleanup().toposort()
def convert_to_groupnorm(instancenorm, graph): """ Convert the Pytorch-exported GroupNorm subgraph to the subgraph below Conv | GroupNorm | ReLU Attributes: instancenorm: Instance Normalization node in the graph. graph: Input graph object """ # Retrieve the instancenorm attributes and create the replacement node attrs = retrieve_attrs(instancenorm) groupnorm = gs.Node(op="GroupNormalizationPlugin", attrs=attrs) graph.nodes.append(groupnorm) # The plugin needs to receive an input from the Conv node, and output to the ReLU node conv_output_tensor = instancenorm.i().inputs[0] # Output of Conv relu_input_tensor = instancenorm.o().o().o().outputs[0] # Output of Add # Reconnect inputs/outputs to the groupnorm plugin conv_output_tensor.outputs[0] = groupnorm relu_input_tensor.inputs[0] = groupnorm # Add scale and bias constant tensors from unsqueeze op as input to group norm plugin groupnorm.inputs.append(instancenorm.o().o().i(1).inputs[0]) groupnorm.inputs.append(instancenorm.o().o().o().i(1).inputs[0])
def create_and_add_plugin_node(graph, topK, keepTopK): batch_size = graph.inputs[0].shape[0] input_h = graph.inputs[0].shape[2] input_w = graph.inputs[0].shape[3] tensors = graph.tensors() boxes_tensor = tensors["boxes"] confs_tensor = tensors["confs"] num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1]) nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK, 4]) nmsed_scores = gs.Variable(name="nmsed_scores").to_variable(dtype=np.float32, shape=[batch_size, keepTopK]) nmsed_classes = gs.Variable(name="nmsed_classes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK]) new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes] mns_node = gs.Node( op="BatchedNMS_TRT", attrs=create_attrs(input_h, input_w, topK, keepTopK), inputs=[boxes_tensor, confs_tensor], outputs=new_outputs) graph.nodes.append(mns_node) graph.outputs = new_outputs return graph.cleanup().toposort()
def layer_upsample(self, layer_name, input_node, output_shape, resize_scale_factors=2): attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } inputs = [input_node] scales = np.array( [1.0, 1.0, resize_scale_factors, resize_scale_factors]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = self.layer_constant(scale_name, scales) roi = self.layer_constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) inputs.append(roi) inputs.append(scale) output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Resize", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def onnx_mul(nodes, layer_name, input_node1, input_node2, output_shape): inputs = [input_node1, input_node2] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Mul", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def onnx_upsample(nodes, layer_name, input_node, output_shape=None, resize_scale_factors=2): attrs = { "coordinate_transformation_mode": 'asymmetric', "mode": 'nearest', "nearest_mode": 'floor', } layer_name = layer_name # 不要和原来 的resize节点名重复 scales = np.array([1.0, 1.0, resize_scale_factors, resize_scale_factors]).astype(np.float32) scale_name = layer_name + ".scale" roi_name = layer_name + ".roi" scale = gs.Constant(scale_name, scales) roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32)) inputs = [input_node, roi, scale] output_node = gs.Variable(layer_name, dtype=np.float32, shape=output_shape) node = gs.Node(op="Resize", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def add_model(cls, input_file="model.onnx", output_file="add.onnx"): """增加节点 在Sigmoid 前增加 LeakyRelu 节点() """ graph = gs.import_onnx(onnx.load(input_file)) first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1] # 找到最后一个名为 Sigmoid 的节点 # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs] # 找到其对应的输入 # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点) # 2. Change the Add to a LeakyRelu lrelu = gs.Variable('new_lrelu', dtype=np.float32) graph.nodes.append( gs.Node(op="LeakyRelu", inputs=first_add.inputs, outputs=[lrelu], attrs={"alpha": 0.02})) # 此时 sigmoid输入变成了lrelu(输出) first_add.inputs.clear() first_add.inputs = [lrelu] # 5. Remove unused nodes/tensors, and topologically sort the graph graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), output_file)
def onnx_slice(nodes, layer_name, input_node, output_shape, start=(0, 0, 0, 0), shape=(2, 2, 3, 3), stride=(1, 1, 1, 1)): """ x = torch.randn([8,8]) x[:,2:4] onnx_slice(nodes,"slice",x,(0,2),(8,4),(1,1)) """ inputs = [input_node] inputs.extend([ gs.Constant(layer_name + '_constant_start', np.asarray(start, np.int32)), gs.Constant(layer_name + '_constant_shape', np.asarray(shape, np.int32)), gs.Constant(layer_name + '_constant_axis', np.arange(0, len(start)).astype(np.int32)), gs.Constant(layer_name + '_constant_stride', np.asarray(stride, np.int32)), ]) name = layer_name output_node = gs.Variable(name, np.float32, output_shape) node = gs.Node(op="Slice", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def layer_conv(self, layer_name, input_node, output_shape, kernel_shape=(3, 3), strides=(1, 1), pads=(1, 1, 1, 1)): attrs = { 'group': 1, 'dilations': [1, 1], 'kernel_shape': kernel_shape, 'strides': strides, 'pads': pads, # "auto_pad": 'SAME_LOWER', } inputs = [input_node] weights_name = layer_name + ".weight" W = self.layer_constant(weights_name, self.weights[weights_name]) inputs.append(W) bias_name = layer_name + ".bias" if bias_name in self.weights: b = self.layer_constant(bias_name, self.weights[bias_name]) inputs.append(b) output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Conv", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def layer_exp(self, layer_name, input_node, output_shape): inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Exp", inputs=inputs, outputs=[output_node]) self.node.append(node) return output_node
def layer_mul(self, layer_name, input_node1, input_node2, output_shape): inputs = [input_node1, input_node2] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Mul", inputs=inputs, outputs=[output_node]) self.node.append(node) return output_node
def onnx_exp(nodes, layer_name, input_node, output_shape): inputs = [input_node] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Exp", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def run(nM,nK,nN): tensor0 = gs.Variable("tensor0", np.float32, [nM, 1]) constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1)) constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1)) constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1)) constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1)) constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1)) constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64))) graphNodeList = [] tensor1 = gs.Variable("tensor1", np.float32, None) node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1]) graphNodeList.append(node1) tensorLoop = tensor1 for i in range(nLoop): tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None) node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2]) graphNodeList.append(node2) tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None) node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3]) graphNodeList.append(node3) tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None) node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4]) graphNodeList.append(node4) tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None) node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5]) graphNodeList.append(node5) tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None) node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6]) graphNodeList.append(node6) tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None) node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7]) graphNodeList.append(node7) tensorLoop = tensor7 tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None) node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)])) graphNodeList.append(node8) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13) onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile) print("Succeeded building %s!" % (onnxFile)) os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
def onnx_reshape(nodes, layer_name, input_node, output_shape, value): inputs = [input_node] inputs.append( gs.Constant(layer_name + '_constant', np.asarray(value, np.int64))) output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Reshape", inputs=inputs, outputs=[output_node]) nodes.append(node) return output_node
def run(self, args): _, graph = super().import_graph(args) TENSOR_MAP = graph.tensors() def get_tensor(name): if name not in TENSOR_MAP: G_LOGGER.critical( "Tensor: {:} does not exist in the model.".format(name)) return TENSOR_MAP[name] # We populate outputs first because we may need to update output nodes from the # input tensors if output == input. output_tensors = [] for name in args.outputs: if name in args.inputs: tensor = gs.Variable( name="{:}_polygraphy_surgeon_insert_output".format(name)) # Bind outputs to outputs of original inputs. # This construct is required to preserve ordering of the input tensors in the output nodes. for out in get_tensor(name).outputs: for index, inp in enumerate(out.inputs): if inp.name == name: out.inputs[index] = tensor G_LOGGER.verbose( "Generating new tensor for output: {:}".format(tensor)) else: tensor = get_tensor(name) tensor.inputs.clear() output_tensors.append(tensor) if not tensor.outputs: for index, out in enumerate(graph.outputs): if out.name == name: graph.outputs[index] = tensor input_tensors = [] for name in args.inputs: tensor = get_tensor(name) tensor.outputs.clear() input_tensors.append(tensor) new_node = gs.Node(op=args.op, name=args.name, inputs=input_tensors, outputs=output_tensors) G_LOGGER.verbose("Generated new node: {:}".format(new_node)) graph.nodes.append(new_node) # Since new graph outputs may be added, and we don't know the types, we skip type checks in ONNX-GraphSurgeon. super().export_graph(graph, args, do_type_check=False)
def layer_reshape(self, layer_name, input_node, output_shape, value): inputs = [input_node] inputs.append( self.layer_constant(layer_name + '_constant', np.asarray(value, np.int64))) output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Reshape", inputs=inputs, outputs=[output_node]) self.node.append(node) return output_node
def layer_unsqueeze(self, layer_name, input_node, output_shape, axes=0): attrs = {"axes": [axes]} inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Unsqueeze", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def onnx_concat(nodes, layer_name, input_node=[], output_shape=(), axis=1): attrs = {"axis": axis} inputs = input_node output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Concat", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def layer_concat(self, layer_name, input_node=[], output_shape=(), axis=1): attrs = {"axis": axis} inputs = input_node output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Concat", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def layer_softmax(self, layer_name, input_node, output_shape, axis=1): attrs = {"axis": axis} inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Softmax", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def layer_transpose(self, layer_name, input_node, output_shape, perm=[0, 1, 2, 3]): attrs = {"perm": perm} inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Transpose", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def onnx_transpose(nodes, layer_name, input_node, output_shape, perm=[0, 1, 2, 3]): attrs = {"perm": perm} inputs = [input_node] output_node = gs.Variable(layer_name, np.float32, output_shape) node = gs.Node(op="Transpose", inputs=inputs, outputs=[output_node], attrs=attrs) nodes.append(node) return output_node
def layer_lrule(self, layer_name, input_node, output_shape, alpha_lrelu=0.1): attrs = {"alpha": alpha_lrelu} inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="LeakyRelu", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def modify_onehot(graph): for node in graph.nodes: if node.op == "OneHot": depth = node.inputs[1].values attrs = {"depth": int(depth)} onehot = gs.Node(op="OnehotPlugin", name=node.name, attrs=attrs) graph.nodes.append(onehot) inp_output_tensor = node.inputs[0] inp_output_tensor.outputs = [onehot] onehot.outputs = node.outputs node.outputs.clear() print(onehot) # Remove the non-used node from the graph completely graph.cleanup() return graph
def layer_clamp(self, layer_name, input_node, output_shape, min_value=0.0, max_value=1.0): inputs = [input_node] inputs.append( self.layer_constant(layer_name + "_min", np.array([min_value], np.float32))) inputs.append( self.layer_constant(layer_name + "_max", np.array([max_value], np.float32))) output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="Clip", inputs=inputs, outputs=[output_node]) self.node.append(node) return output_node
def layer_bn(self, layer_name, input_node, output_shape): attrs = {"epsilon": self.epsilon_bn, "momentum": self.momentum_bn} inputs = [input_node] param_names = [ layer_name + ".weight", layer_name + ".bias", layer_name + ".running_mean", layer_name + ".running_var" ] for param in param_names: inputs.append(self.layer_constant(param, self.weights[param])) output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="BatchNormalization", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def create_and_add_plugin_node(graph, args): batch_size = graph.inputs[0].shape[0] tensors = graph.tensors() boxes_tensor = tensors["boxes"] confs_tensor = tensors["confs"] keepTopK = int(args.keepTopK) num_detections = gs.Variable(name="num_detections").to_variable( dtype=np.int32, shape=[batch_size, 1]) nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable( dtype=np.float32, shape=[batch_size, keepTopK, 4]) nmsed_scores = gs.Variable(name="nmsed_scores").to_variable( dtype=np.float32, shape=[batch_size, keepTopK]) nmsed_classes = gs.Variable(name="nmsed_classes").to_variable( dtype=np.float32, shape=[batch_size, keepTopK]) new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes] mns_node = gs.Node( op="BatchedNMSDynamic_TRT", attrs={ "shareLocation": 1, "backgroundLabelId": -1, "numClasses": int(args.nbCls), "topK": int(args.topK), "keepTopK": keepTopK, "scoreThreshold": float(args.score), "iouThreshold": float(args.iou), "isNormalized": 1, "clipBoxes": 1, "plugin_version": "1", }, inputs=[boxes_tensor, confs_tensor], outputs=new_outputs, ) graph.nodes.append(mns_node) graph.outputs = new_outputs return graph.cleanup().toposort()
def layer_avgpool(self, layer_name, input_node, output_shape, kernel_shape=(2, 2), strides=(2, 2), pads=(0, 0, 0, 0)): attrs = { "kernel_shape": kernel_shape, "strides": strides, "pads": pads, } inputs = [input_node] output_node = self.layer_variable(layer_name, output_shape) node = gs.Node(op="AveragePool", inputs=inputs, outputs=[output_node], attrs=attrs) self.node.append(node) return output_node
def append_nms(graph, num_classes, scoreThreshold, iouThreshold, keepTopK): out_tensors = graph.outputs bs = out_tensors[0].shape[0] nms_attrs = { 'shareLocation': True, 'backgroundLabelId': -1, 'numClasses': num_classes, 'topK': 1024, 'keepTopK': keepTopK, 'scoreThreshold': scoreThreshold, 'iouThreshold': iouThreshold, 'isNormalized': True, 'clipBoxes': True } nms_num_detections = gs.Variable(name="nms_num_detections", dtype=np.int32, shape=(bs, 1)) nms_boxes = gs.Variable(name="nms_boxes", dtype=np.float32, shape=(bs, keepTopK, 4)) nms_scores = gs.Variable(name="nms_scores", dtype=np.float32, shape=(bs, keepTopK)) nms_classes = gs.Variable(name="nms_classes", dtype=np.float32, shape=(bs, keepTopK)) nms = gs.Node( op="BatchedNMSDynamic_TRT", attrs=nms_attrs, inputs=out_tensors, outputs=[nms_num_detections, nms_boxes, nms_scores, nms_classes]) graph.nodes.append(nms) graph.outputs = [nms_num_detections, nms_boxes, nms_scores, nms_classes] return graph