def make_multi_input_output(): DTYPE = np.float32 SHAPE = (1,) X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE) Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE) graph = gs.Graph(inputs=[X0, Y0]) X1 = graph.identity(X0) Y1 = graph.identity(Y0) Z0 = graph.add(X1, Y1) Z1 = graph.identity(Z0) Z1.dtype = DTYPE Z1.shape = SHAPE Z2 = graph.identity(Z0) Z2.dtype = DTYPE Z2.shape = SHAPE graph.outputs = [Z1, Z2] save(graph, "reducable.onnx")
def save_model(self): # Note that initializers do not necessarily have to be graph inputs graph = gs.Graph(nodes=self.node, inputs=self.inputs, outputs=self.outputs) # print(onnx.helper.printable_graph(graph)) onnx.save(gs.export_onnx(graph), self.output_file_path) """验证保存的.onnx格式是否正确""" onnx_model = onnx.load(self.output_file_path) onnx.checker.check_model(onnx_model)
def run(nM,nK,nN): tensor0 = gs.Variable("tensor0", np.float32, [nM, 1]) constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1)) constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1)) constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1)) constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1)) constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1)) constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64))) graphNodeList = [] tensor1 = gs.Variable("tensor1", np.float32, None) node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1]) graphNodeList.append(node1) tensorLoop = tensor1 for i in range(nLoop): tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None) node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2]) graphNodeList.append(node2) tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None) node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3]) graphNodeList.append(node3) tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None) node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4]) graphNodeList.append(node4) tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None) node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5]) graphNodeList.append(node5) tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None) node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6]) graphNodeList.append(node6) tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None) node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7]) graphNodeList.append(node7) tensorLoop = tensor7 tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None) node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)])) graphNodeList.append(node8) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13) onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile) print("Succeeded building %s!" % (onnxFile)) os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
def make_constant_linear(): DTYPE = np.float32 SHAPE = (4, 4) graph = gs.Graph() X0 = graph.constant(gs.Constant("const", values=np.ones(SHAPE, dtype=DTYPE))) # Explicitly clear shape to trigger the failure condition in reduce X0.shape = None X1 = graph.identity(X0) X2 = graph.identity(X1) X2.dtype = DTYPE X2.shape = SHAPE graph.outputs = [X2] save(graph, "reducable_with_const.onnx")
constant0 = gs.Constant(name="constant0", values=np.ones(shape=[1, 3, 3, 3], dtype=np.float32)) # 定义张量(常量) constant1 = gs.Constant(name="constant1", values=np.ones(shape=[1], dtype=np.float32)) node0 = gs.Node(name="myConv", op="Conv", inputs=[tensor0, constant0], outputs=[tensor1]) # 定义节点,使用张量作为输入和输出 node0.attrs = OrderedDict([ ['dilations', [1, 1]], ['kernel_shape', [3, 3]], ['pads', [1, 1, 1, 1]], ['strides', [1, 1]], ]) # 节点的属性参数 node1 = gs.Node(name="myAdd", op="Add", inputs=[tensor1, constant1], outputs=[tensor2]) node2 = gs.Node(name="myRelu", op="Relu", inputs=[tensor2], outputs=[tensor3]) graph = gs.Graph(nodes=[node0, node1, node2], inputs=[tensor0], outputs=[tensor3]) # 定义计算图,要求给出所有节点和输入输出张量 graph.cleanup().toposort() # 保存计算图前的收尾工作,详细作用见 06-Fold.py onnx.save(gs.export_onnx(graph), "model-01.onnx")
import numpy as np import onnx # Computes Y = x0 + (a * x1 + b) shape = (1, 3, 224, 224) # Inputs x0 = gs.Variable(name="x0", dtype=np.float32, shape=shape) x1 = gs.Variable(name="x1", dtype=np.float32, shape=shape) # Intermediate tensors a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32)) b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32)) mul_out = gs.Variable(name="mul_out") add_out = gs.Variable(name="add_out") # Outputs Y = gs.Variable(name="Y", dtype=np.float32, shape=shape) nodes = [ # mul_out = a * x1 gs.Node(op="Mul", inputs=[a, x1], outputs=[mul_out]), # add_out = mul_out + b gs.Node(op="Add", inputs=[mul_out, b], outputs=[add_out]), # Y = x0 + add gs.Node(op="Add", inputs=[x0, add_out], outputs=[Y]), ] graph = gs.Graph(nodes=nodes, inputs=[x0, x1], outputs=[Y]) onnx.save(gs.export_onnx(graph), "model.onnx")
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224)) # Since W is a Constant, it will automatically be exported as an initializer W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32)) Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222)) node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y]) # Note that initializers do not necessarily have to be graph inputs graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y]) onnx.save(gs.export_onnx(graph), "test_conv.onnx")
def min(self, *args): return self.layer(op="Min", inputs=args, outputs=["min_out"])[0] @gs.Graph.register() def max(self, *args): return self.layer(op="Max", inputs=args, outputs=["max_out"])[0] @gs.Graph.register() def identity(self, inp): return self.layer(op="Identity", inputs=[inp], outputs=["identity_out"])[0] # Generate the graph graph = gs.Graph() graph.inputs = [gs.Variable("input", shape=(4, 4), dtype=np.float32)] # Clip values to [0, 6] MIN_VAL = np.array(0, np.float32) MAX_VAL = np.array(6, np.float32) # Add identity nodes to make the graph structure a bit more interesting inp = graph.identity(graph.inputs[0]) max_out = graph.max(graph.min(inp, MAX_VAL), MIN_VAL) graph.outputs = [ graph.identity(max_out), ] # Graph outputs must include dtype information
def relu(self, a): return propagate_dtype(self.layer(op="Relu", inputs=[a], outputs=["act_out_gs"]), a.dtype) # Note that the same function can be defined in different ways for different opsets. # It will only be called if the Graph's opset matches one of the opsets for which the function is registered. # Hence, for the opset 11 graph used in this example, the following function will never be used. @gs.Graph.register(opsets=[1]) def relu(self, a): raise NotImplementedError("This function has not been implemented!") ########################################################################################################## # The functions registered above greatly simplify the process of building the graph itself. graph = gs.Graph(opset=11) # Generates a graph which computes: # output = ReLU((A * X^T) + B) (.) C + D X = gs.Variable(name="X", shape=(64, 64), dtype=np.float32) graph.inputs = [X] # axt = (A * X^T) # Note that we can use NumPy arrays directly (e.g. Tensor A), # instead of Constants. These will automatically be converted to Constants. A = np.ones(shape=(64, 64), dtype=np.float32) axt = graph.gemm(A, X, trans_b=True) # dense = ReLU(axt + B) B = np.ones((64, 64), dtype=np.float32) * 0.5 dense = graph.relu(*graph.add(*axt, B))
outputs=[tensor8]) node2 = gs.Node(name="myAdd2", op="Add", inputs=[tensor0, tensor8], outputs=[tensor1]) # 有效节点 node3 = gs.Node(name="myAdd3", op="Add", inputs=[tensor1, constant0], outputs=[tensor2]) # 有效节点 node4 = gs.Node(name="myAdd4", op="Add", inputs=[tensor5, constant0], outputs=[tensor6]) # 无效节点 graph = gs.Graph(nodes=[node4, node3, node2, node1, node0], inputs=[tensor0, tensor3], outputs=[tensor2, tensor4]) onnx.save(gs.export_onnx(graph), "model-06-01.onnx") # 原始计算图,可见 4 个无边张量和 1 个无边的节点,还有 1 个常数计算链 onnx.save( gs.export_onnx(graph.fold_constants()), "model-06-02.onnx" ) # 常数折叠后的计算图,常数计算链合并到主链中,多出 2 个无边 Add 节点,注意常数折叠并不做节点融合的工作,主链上两个 Add 没有合并掉 onnx.save(gs.export_onnx(graph.fold_constants().cleanup()), "model-06-03.onnx") # 打扫后的计算图,可见 3 个无用的 Add 节点被清除 print("Before toposort:") # 原始节点顺序 for index, node in enumerate(graph.nodes): print("No.%d->%s" % (index, node.name)) print("After toposort:") # 拓扑排序后的节点顺序,节点基本按照计算图的计算顺序进行排列
def run(nM, nK, nN): tensor0 = gs.Variable("tensor0", np.float32, [nM, 1]) constant1xK = gs.Constant( "constant1xK", np.ascontiguousarray( np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1)) constantKxN = gs.Constant( "constantKxN", np.ascontiguousarray( np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1)) constantN = gs.Constant( "constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1)) constantNxK = gs.Constant( "constantNxK", np.ascontiguousarray( np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1)) constantK = gs.Constant( "constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1)) constantM1 = gs.Constant( "constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64))) graphNodeList = [] tensor1 = gs.Variable("tensor1", np.float32, None) node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1]) graphNodeList.append(node1) tensorLoop = tensor1 for i in range(nLoop): tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None) node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2]) graphNodeList.append(node2) tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None) node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3]) graphNodeList.append(node3) tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None) node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4]) graphNodeList.append(node4) tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None) node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5]) graphNodeList.append(node5) tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None) node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6]) graphNodeList.append(node6) tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None) node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7]) graphNodeList.append(node7) tensorLoop = tensor7 tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None) node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)])) graphNodeList.append(node8) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13) onnxFile = "model-%d-%d-%d.onnx" % (nM, nK, nN) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile) print("Succeeded building %s!" % (onnxFile)) logger = trt.Logger(trt.Logger.VERBOSE) builder = trt.Builder(logger) network = builder.create_network( 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) config = builder.create_builder_config() config.max_workspace_size = 22 << 30 parser = trt.OnnxParser(network, logger) with open(onnxFile, 'rb') as model: parser.parse(model.read()) engineString = builder.build_serialized_network(network, config) planFile = onnxFile.split('.')[0] + ".plan" with open(planFile, 'wb') as f: f.write(engineString) print("Succeeded building %s!" % (planFile)) os.system( "trtexec --loadEngine=%s --useCudaGraph --noDataTransfers --fp16" % planFile)
# Generates a model with multiple inputs/outputs. Something like: # X0 Y0 # | | # X1 Y1 # \ / # Z0 # / \ # Z1 Z2 DTYPE = np.float32 SHAPE = (1,) X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE) Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE) graph = gs.Graph(inputs=[X0, Y0]) X1 = graph.identity(X0) Y1 = graph.identity(Y0) Z0 = graph.add(X1, Y1) Z1 = graph.identity(Z0) Z1.dtype = DTYPE Z1.shape = SHAPE Z2 = graph.identity(Z0) Z2.dtype = DTYPE Z2.shape = SHAPE graph.outputs = [Z1, Z2]
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx import onnx_graphsurgeon as gs import numpy as np x = gs.Variable(name="x", dtype=np.float32, shape=[1, 3, 5, 5]) _0 = gs.Variable(name="_0", dtype=np.float32, shape=[1, 3, 5, 5]) _1 = gs.Variable(name="_1", dtype=np.float32, shape=[1, 3, 5, 5]) y = gs.Variable(name="y", dtype=np.float32, shape=[1, 3, 5, 5]) node0 = gs.Node(op="Identity", inputs=[x], outputs=[_0]) node1 = gs.Node(op="NonZero", inputs=[_0], outputs=[_1]) node2 = gs.Node(op="Identity", inputs=[_1], outputs=[y]) graph = gs.Graph(nodes=[node0, node1, node2], inputs=[x], outputs=[y]) onnx.save(gs.export_onnx(graph), "model-NonZero.onnx")
node5 = gs.Node("ReduceProd", "myReduceProd1", inputs=[tensor5], attrs={ "axes": [0], "keepdims": int(True) }, outputs=[tensor6]) # value=(B*5), shape=() node6 = gs.Node("Concat", "myConcat", inputs=[tensor4, tensor6], attrs={"axis": 0}, outputs=[tensor7]) # value=(A,3,B*5), shape=() node7 = gs.Node("Reshape", "myReshape1", inputs=[tensor0, tensor7], outputs=[tensor8]) # shape=(A*3*B*5,) graph = gs.Graph( nodes=[node0, node1, node2, node3, node4, node5, node6, node7], inputs=[tensor0], outputs=[tensor3, tensor8]) graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), "model-07-01.onnx") graph.inputs[0].shape = [2, 3, 4, 5] # 如果是 static shape,则 fold_constants 可以化简计算图 graph.fold_constants().cleanup().toposort() onnx.save(gs.export_onnx(graph), "model-07-02.onnx")
graphNodeList.append(node12) tensor13 = gs.Variable("tensor-13", np.float32, None) node13 = gs.Node("Add", "Add-13", inputs=[tensor12, constant10], outputs=[tensor13]) graphNodeList.append(node13) tensor14 = gs.Variable("tensor-14", np.float32, None) node14 = gs.Node("Softmax", "Softmax-14", inputs=[tensor13], outputs=[tensor14], attrs=OrderedDict([('axis', 1)])) graphNodeList.append(node14) tensor15 = gs.Variable("tensor-15", np.int64, None) node15 = gs.Node("ArgMax", "ArgMax-15", inputs=[tensor14], outputs=[tensor15], attrs=OrderedDict([('axis', 1), ('keepdims', 0)])) graphNodeList.append(node15) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor15]) graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), onnxFile) print("Succeeded create %s" % onnxFile)
def insert_decoder_loop(decoder_iter_onnx_path, output_dir, decoder_out_name, fp16): float_prec = np.float16 if fp16 else np.float32 # Modify loop body so that it has 2+N inputs: (iteration_num, condition, loop carried dependencies...) # and 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...) # In this case, the loop carried dependencies include the following IN ORDER # - decoder_output/decoder_input # - attention_hidden # - attention_cell # - decoder_hidden # - decoder_cell # - attention_weights # - attention_weights_cum # - attention_context # - not_finished (bool tensor, initialized to all True) # - mel_lengths # The following are NOT loop carried dependencies (they remain constant through the loop), and must be moved to be inputs outside of the loop body # - memory # - processed_memory # - mask # The scan outputs are # - mel_outputs (which scans across decoder_output) # - gate_outputs (scans across gate_prediction) # - alignments (scans across attention_weights) loop_body = gs.import_onnx(onnx.load(decoder_iter_onnx_path)) loop_tensors = loop_body.tensors() iteration_num = gs.Variable("iteration_num", dtype=np.int64, shape=()) cond_in = gs.Variable("cond_in", dtype=bool, shape=()) cond_out = gs.Variable("cond_out", dtype=bool, shape=()) not_finished_in = gs.Variable("not_finished_in", shape=('batch_size', 1), dtype=bool) not_finished_out = gs.Variable("not_finished_out", shape=('batch_size', 1), dtype=bool) mel_lengths_in = gs.Variable("mel_lengths_in", shape=('batch_size', 1), dtype=np.int32) mel_lengths_out = gs.Variable("mel_lengths_out", shape=('batch_size', 1), dtype=np.int32) # Set loop body inputs in the correct order loop_body.inputs = [ iteration_num, cond_in, loop_tensors["decoder_input"], loop_tensors["attention_hidden"], loop_tensors["attention_cell"], loop_tensors["decoder_hidden"], loop_tensors["decoder_cell"], loop_tensors["attention_weights"], loop_tensors["attention_weights_cum"], loop_tensors["attention_context"], not_finished_in, mel_lengths_in ] # Set loop body outputs in the correct order loop_body.outputs = [ cond_out, loop_tensors["decoder_output"], loop_tensors["out_attention_hidden"], loop_tensors["out_attention_cell"], loop_tensors["out_decoder_hidden"], loop_tensors["out_decoder_cell"], loop_tensors["out_attention_weights"], loop_tensors["out_attention_weights_cum"], loop_tensors["out_attention_context"], not_finished_out, mel_lengths_out, loop_tensors["decoder_output"], loop_tensors["gate_prediction"], loop_tensors["out_attention_weights"] ] # The loop stop condition is given by the following lines in PyTorch # dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1) # not_finished = not_finished*dec # if torch.sum(not_finished) == 0: # break # To compute cond_out, we can essentially follow the same steps. Using Less instead of Greater+Not for now gate_threshold = gs.Constant("gate_threshold", np.array([0.5], dtype=float_prec)) gate_sigmoid = gs.Variable("gate_sigmoid", dtype=float_prec, shape=()) sigmoid = loop_body.nodes.append( gs.Node(op="Sigmoid", inputs=[loop_tensors["gate_prediction"]], outputs=[gate_sigmoid])) leq_output = gs.Variable("leq_output", dtype=bool) leq = loop_body.nodes.append( gs.Node(op="Less", inputs=[gate_sigmoid, gate_threshold], outputs=[leq_output])) loop_body.nodes.append( gs.Node(op="And", inputs=[not_finished_in, leq_output], outputs=[not_finished_out])) cast_output = gs.Variable("cast_output", dtype=np.int32) loop_body.nodes.append( gs.Node(op="Cast", inputs=[not_finished_out], outputs=[cast_output], attrs={"to": 6})) # int32 reduce_output = gs.Variable("reduce_output", dtype=np.int32) loop_body.nodes.append( gs.Node(op="ReduceSum", inputs=[cast_output], outputs=[reduce_output], attrs={ "axes": [0], "keepdims": 0 })) unsqueezed_cond_out = gs.Variable("unsqueezed_cond_out", dtype=bool) loop_body.nodes.append( gs.Node(op="Equal", inputs=[ reduce_output, gs.Constant("zero", np.array(0, dtype=np.int32)) ], outputs=[unsqueezed_cond_out])) squeezed_cond_out = gs.Variable("squeezed_cond_out", dtype=bool) loop_body.nodes.append( gs.Node(op="Squeeze", inputs=[unsqueezed_cond_out], outputs=[squeezed_cond_out], attrs={"axes": [0]})) loop_body.nodes.append( gs.Node(op="Not", inputs=[squeezed_cond_out], outputs=[cond_out])) # Compute mel_lengths # from PyTorch: mel_lengths += not_finished loop_body.nodes.append( gs.Node(op="Add", inputs=[mel_lengths_in, cast_output], outputs=[mel_lengths_out])) memory = gs.Variable("memory", dtype=float_prec, shape=('batch_size', 'seq_len', 512)) processed_memory = gs.Variable("processed_memory", dtype=float_prec, shape=('batch_size', 'seq_len', 128)) mask = gs.Variable("mask", dtype=bool, shape=('batch_size', 'seq_len')) loop_body.toposort() onnx.save( gs.export_onnx(loop_body), os.path.join( output_dir, "loop_body_{prec}.onnx".format( prec="fp16" if float_prec == np.float16 else "fp32"))) # Create outer graph # Inputs to outer graph are the following (suffixed with _0 to signify initial states) # - decoder_input_0 # - attention_hidden_0 # - attention_cell_0 # - decoder_hidden_0 # - decoder_cell_0 # - attention_weights_0 # - attention_weights_cum_0 # - attention_context_0 # - memory # - processed_memory # - mask # Outputs are the following # - mel_outputs # - mel_lengths # Note: alignments and gate_outputs are scan outputs, but don't seem to be used later in the PyTorch implementation. For now, we will make them intermediate tensors that are not outputted graph = gs.Graph() decoder_input_0 = gs.Variable("decoder_input_0", dtype=float_prec, shape=('batch_size', 80)) attention_hidden_0 = gs.Variable("attention_hidden_0", dtype=float_prec, shape=('batch_size', 1024)) attention_cell_0 = gs.Variable("attention_cell_0", dtype=float_prec, shape=('batch_size', 1024)) decoder_hidden_0 = gs.Variable("decoder_hidden_0", dtype=float_prec, shape=('batch_size', 1024)) decoder_cell_0 = gs.Variable("decoder_cell_0", dtype=float_prec, shape=('batch_size', 1024)) attention_weights_0 = gs.Variable("attention_weights_0", dtype=float_prec, shape=('batch_size', 'seq_len')) attention_weights_cum_0 = gs.Variable("attention_weights_cum_0", dtype=float_prec, shape=('batch_size', 'seq_len')) attention_context_0 = gs.Variable("attention_context_0", dtype=float_prec, shape=('batch_size', 512)) not_finished_0 = gs.Variable("not_finished_0", dtype=bool) mel_lengths_0 = gs.Variable("mel_lengths_0", dtype=np.int32) # For not_finished, we need to generate a tensor of shape (batch_size) that is all 1s # We can use the ONNX ConstantOfShape op to do this not_finished_shape = gs.Variable("not_finished_shape", dtype=np.int64) reduced = gs.Variable("reduced", dtype=float_prec) graph.nodes.append( gs.Node(op="ReduceSum", inputs=[decoder_input_0], outputs=[reduced], attrs={ "axes": [1], "keepdims": 1 })) graph.nodes.append( gs.Node(op="Shape", inputs=[reduced], outputs=[not_finished_shape])) before_cast = gs.Variable("before_cast", dtype=np.int32) graph.nodes.append( gs.Node( op="ConstantOfShape", inputs=[not_finished_shape], outputs=[before_cast], attrs={"value": gs.Constant("one", np.array([1], dtype=np.int32))})) graph.nodes.append( gs.Node(op="Cast", inputs=[before_cast], outputs=[not_finished_0], attrs={"to": 9})) # Same thing for mel_lengths, but we need all 0s graph.nodes.append( gs.Node(op="ConstantOfShape", inputs=[not_finished_shape], outputs=[mel_lengths_0], attrs={ "value": gs.Constant("zero", np.array([0], dtype=np.int32)) })) # Loop carried dependecies at the end of the loop decoder_input_t = gs.Variable("decoder_input_t", dtype=float_prec, shape=('batch_size', 80)) attention_hidden_t = gs.Variable("attention_hidden_t", dtype=float_prec, shape=('batch_size', 1024)) attention_cell_t = gs.Variable("attention_cell_t", dtype=float_prec, shape=('batch_size', 1024)) decoder_hidden_t = gs.Variable("decoder_hidden_t", dtype=float_prec, shape=('batch_size', 1024)) decoder_cell_t = gs.Variable("decoder_cell_t", dtype=float_prec, shape=('batch_size', 1024)) attention_weights_t = gs.Variable("attention_weights_t", dtype=float_prec, shape=('batch_size', 'seq_len')) attention_weights_cum_t = gs.Variable("attention_weights_cum_t", dtype=float_prec, shape=('batch_size', 'seq_len')) attention_context_t = gs.Variable("attention_context_t", dtype=float_prec, shape=('batch_size', 512)) not_finished_t = gs.Variable("not_finished_t", dtype=bool) mel_lengths_t = gs.Variable("mel_lengths_t", dtype=np.int32, shape=('batch_size', 1)) # Scan outputs mel_outputs_raw = gs.Variable("mel_outputs_raw", dtype=float_prec, shape=(-1, 'batch_size', 80)) gate_outputs = gs.Variable("gate_outputs", dtype=float_prec, shape=(-1, 'batch_size', 1)) alignments = gs.Variable("alignments", dtype=float_prec, shape=(-1, 1, 'seq_len')) mel_outputs = gs.Variable("mel_outputs", dtype=float_prec, shape=('batch_size', 80, -1)) graph.inputs = [ decoder_input_0, attention_hidden_0, attention_cell_0, decoder_hidden_0, decoder_cell_0, attention_weights_0, attention_weights_cum_0, attention_context_0, memory, processed_memory, mask ] graph.outputs = [mel_outputs, mel_lengths_t] trip_count = gs.Constant( "trip_count", np.array(0, dtype=np.int64) ) # In ONNX, this is an optional parameter, but I don't think ONNX-GS supports optional inputs. To fix this, after we export the ONNX ModelProto from GS, we replace this input with "" initial_cond = gs.Constant("initial_cond", np.array(True, dtype=bool)) loop_inputs = [ trip_count, initial_cond, decoder_input_0, attention_hidden_0, attention_cell_0, decoder_hidden_0, decoder_cell_0, attention_weights_0, attention_weights_cum_0, attention_context_0, not_finished_0, mel_lengths_0 ] loop_outputs = [ decoder_input_t, attention_hidden_t, attention_cell_t, decoder_hidden_t, decoder_cell_t, attention_weights_t, attention_weights_cum_t, attention_context_t, not_finished_t, mel_lengths_t, mel_outputs_raw, gate_outputs, alignments ] decoder_loop = gs.Node(op="Loop", name="decoder_loop", inputs=loop_inputs, outputs=loop_outputs, attrs={"body": loop_body}) graph.nodes.append(decoder_loop) graph.nodes.append( gs.Node(op="Transpose", inputs=[mel_outputs_raw], outputs=[mel_outputs], attrs={ "perm": [1, 2, 0] })) # Output needs to have loop dimension as inner-most dim graph.toposort() exported_graph = gs.export_onnx(graph) [x for x in exported_graph.graph.node if x.name == "decoder_loop"][0].input[0] = "" # Remove trip count input onnx.save(exported_graph, os.path.join(output_dir, decoder_out_name))
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import onnx_graphsurgeon as gs import numpy as np import onnx # Inputs x = gs.Variable(name="x", dtype=np.float32, shape=(1, 3, 224, 224)) # Intermediate tensors i0 = gs.Variable(name="i0") i1 = gs.Variable(name="i1") # Outputs y = gs.Variable(name="y", dtype=np.float32) nodes = [ gs.Node(op="Identity", inputs=[x], outputs=[i0]), gs.Node(op="FakeNodeToRemove", inputs=[i0], outputs=[i1]), gs.Node(op="Identity", inputs=[i1], outputs=[y]), ] graph = gs.Graph(nodes=nodes, inputs=[x], outputs=[y]) onnx.save(gs.export_onnx(graph), "model.onnx")
constant0 = gs.Constant(name="constant0", values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32)) node0 = gs.Node(name="myIdentity0", op="Identity", inputs=[tensor0], outputs=[tensor1]) node1 = gs.Node(name="myAdd", op="Add", inputs=[tensor1, constant0], outputs=[tensor2]) node2 = gs.Node(name="myIdentity1", op="Identity", inputs=[tensor2], outputs=[tensor3]) graph = gs.Graph(nodes=[node0, node1, node2], inputs=[tensor0], outputs=[tensor3]) graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), "model-03-01.onnx") for node in graph.nodes: if node.op == 'Add' and node.name == 'myAdd': index = node.o().inputs.index(node.outputs[0]) # 小心地找到下一个节点中该张量的位置 node.o().inputs[index] = node.inputs[0] # 把下一节点的对应输入张量赋为 Add 节点的输入张量 node.outputs = [] # 关键操作:将 Add 节点的输出张量设置为空,这样 Add 节点就成为无用节点,可以被自动清理删掉 graph.cleanup().toposort() # 在清理时会自动删掉 Add 节点+ onnx.save(gs.export_onnx(graph), "model-03-02.onnx")
"ReLU-" + str(i), inputs=[tensor5], outputs=[tensor6]) graphNodeList.append(node6) tensorLoop = tensor6 tensor7 = gs.Variable("tensor-6", dtype=np.float32, shape=None) node7 = gs.Node("Conv", "Conv1", inputs=[tensorLoop, constant1x32], outputs=[tensor7]) graphNodeList.append(node7) graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor7], opset=13) onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile0) print("Succeeded building %s!" % (onnxFile0)) # 修改 .onnx graph = gs.import_onnx(onnx.load(onnxFile0)) constant32r = gs.Constant( "constant32r", np.ascontiguousarray( np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) * 2 - 1)) for node in graph.nodes:
import numpy as np import onnx # Computes outputs = input + ((a + b) + d) shape = (1, 3) # Inputs input = gs.Variable("input", shape=shape, dtype=np.float32) # Intermediate tensors a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32)) b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32)) c = gs.Variable("c") d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32)) e = gs.Variable("e") # Outputs output = gs.Variable("output", shape=shape, dtype=np.float32) nodes = [ # c = (a + b) gs.Node("Add", inputs=[a, b], outputs=[c]), # e = (c + d) gs.Node("Add", inputs=[c, d], outputs=[e]), # output = input + e gs.Node("Add", inputs=[input, e], outputs=[output]), ] graph = gs.Graph(nodes=nodes, inputs=[input], outputs=[output]) onnx.save(gs.export_onnx(graph), "model.onnx")
import onnx_graphsurgeon as gs tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64, 64]) tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=None) tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=None) node0 = gs.Node(name="myIdentity0", op="Identity", inputs=[tensor0], outputs=[tensor1]) node1 = gs.Node(name="myIdentity1", op="Identity", inputs=[tensor1], outputs=[tensor2]) graph = gs.Graph(nodes=[node0, node1], inputs=[tensor0], outputs=[tensor2]) graph.cleanup().toposort() onnx.save(gs.export_onnx(graph), "model-02-01.onnx") for node in graph.nodes: if node.op == 'Identity' and node.name == 'myIdentity0': # 遍历计算图找到需要添加节点的位置 constant0 = gs.Constant(name="constant0", values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32)) # 构造新节点和新张量 tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None) newNode = gs.Node(name="myAdd", op="Add", inputs=[node.outputs[0], constant0], outputs=[tensor3]) graph.nodes.append(newNode) # 记得把新节点加入计算图中