Python Graph 예제들, onnx_graphsurgeon.Graph Python 예제들

예제 #1

0

파일 보기

def make_multi_input_output():
    DTYPE = np.float32
    SHAPE = (1,)

    X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE)
    Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE)

    graph = gs.Graph(inputs=[X0, Y0])

    X1 = graph.identity(X0)
    Y1 = graph.identity(Y0)

    Z0 = graph.add(X1, Y1)

    Z1 = graph.identity(Z0)
    Z1.dtype = DTYPE
    Z1.shape = SHAPE

    Z2 = graph.identity(Z0)
    Z2.dtype = DTYPE
    Z2.shape = SHAPE

    graph.outputs = [Z1, Z2]

    save(graph, "reducable.onnx")

예제 #2

0

파일 보기

 def save_model(self):
     # Note that initializers do not necessarily have to be graph inputs
     graph = gs.Graph(nodes=self.node,
                      inputs=self.inputs,
                      outputs=self.outputs)
     # print(onnx.helper.printable_graph(graph))
     onnx.save(gs.export_onnx(graph), self.output_file_path)
     """验证保存的.onnx格式是否正确"""
     onnx_model = onnx.load(self.output_file_path)
     onnx.checker.check_model(onnx_model)

예제 #3

0

파일 보기

def run(nM,nK,nN):
    tensor0 = gs.Variable("tensor0", np.float32, [nM, 1])

    constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1))
    constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1))
    constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1))
    constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1))
    constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1))
    constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64)))

    graphNodeList = []

    tensor1 = gs.Variable("tensor1", np.float32, None)
    node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1])
    graphNodeList.append(node1)

    tensorLoop = tensor1
    for i in range(nLoop):
        tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None)
        node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2])
        graphNodeList.append(node2)

        tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None)
        node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3])
        graphNodeList.append(node3)

        tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None)
        node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4])
        graphNodeList.append(node4)

        tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None)
        node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5])
        graphNodeList.append(node5)

        tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None)
        node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6])
        graphNodeList.append(node6)

        tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None)
        node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7])
        graphNodeList.append(node7)

        tensorLoop = tensor7

    tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None)
    node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)]))
    graphNodeList.append(node8)

    graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13)

    onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN)
    onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile)
    print("Succeeded building %s!" % (onnxFile))

    os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)

예제 #4

0

파일 보기

def make_constant_linear():
    DTYPE = np.float32
    SHAPE = (4, 4)

    graph = gs.Graph()

    X0 = graph.constant(gs.Constant("const", values=np.ones(SHAPE, dtype=DTYPE)))
    # Explicitly clear shape to trigger the failure condition in reduce
    X0.shape = None

    X1 = graph.identity(X0)
    X2 = graph.identity(X1)
    X2.dtype = DTYPE
    X2.shape = SHAPE

    graph.outputs = [X2]

    save(graph, "reducable_with_const.onnx")

예제 #5

0

파일 보기

파일: 01-CreateModel.py 프로젝트: NVIDIA/trt-samples-for-hackathon-cn

constant0 = gs.Constant(name="constant0",
                        values=np.ones(shape=[1, 3, 3, 3],
                                       dtype=np.float32))  # 定义张量（常量）
constant1 = gs.Constant(name="constant1",
                        values=np.ones(shape=[1], dtype=np.float32))

node0 = gs.Node(name="myConv",
                op="Conv",
                inputs=[tensor0, constant0],
                outputs=[tensor1])  # 定义节点，使用张量作为输入和输出
node0.attrs = OrderedDict([
    ['dilations', [1, 1]],
    ['kernel_shape', [3, 3]],
    ['pads', [1, 1, 1, 1]],
    ['strides', [1, 1]],
])  # 节点的属性参数

node1 = gs.Node(name="myAdd",
                op="Add",
                inputs=[tensor1, constant1],
                outputs=[tensor2])
node2 = gs.Node(name="myRelu", op="Relu", inputs=[tensor2], outputs=[tensor3])

graph = gs.Graph(nodes=[node0, node1, node2],
                 inputs=[tensor0],
                 outputs=[tensor3])  # 定义计算图，要求给出所有节点和输入输出张量

graph.cleanup().toposort()  # 保存计算图前的收尾工作，详细作用见 06-Fold.py
onnx.save(gs.export_onnx(graph), "model-01.onnx")

예제 #6

0

파일 보기

import numpy as np
import onnx

# Computes Y = x0 + (a * x1 + b)

shape = (1, 3, 224, 224)
# Inputs
x0 = gs.Variable(name="x0", dtype=np.float32, shape=shape)
x1 = gs.Variable(name="x1", dtype=np.float32, shape=shape)

# Intermediate tensors
a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
mul_out = gs.Variable(name="mul_out")
add_out = gs.Variable(name="add_out")

# Outputs
Y = gs.Variable(name="Y", dtype=np.float32, shape=shape)

nodes = [
    # mul_out = a * x1
    gs.Node(op="Mul", inputs=[a, x1], outputs=[mul_out]),
    # add_out = mul_out + b
    gs.Node(op="Add", inputs=[mul_out, b], outputs=[add_out]),
    # Y = x0 + add
    gs.Node(op="Add", inputs=[x0, add_out], outputs=[Y]),
]

graph = gs.Graph(nodes=nodes, inputs=[x0, x1], outputs=[Y])
onnx.save(gs.export_onnx(graph), "model.onnx")

예제 #7

0

파일 보기

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224))
# Since W is a Constant, it will automatically be exported as an initializer
W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32))

Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222))

node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y])

# Note that initializers do not necessarily have to be graph inputs
graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y])
onnx.save(gs.export_onnx(graph), "test_conv.onnx")

예제 #8

0

파일 보기

파일: generate.py 프로젝트: celidos/TensorRT_study

def min(self, *args):
    return self.layer(op="Min", inputs=args, outputs=["min_out"])[0]


@gs.Graph.register()
def max(self, *args):
    return self.layer(op="Max", inputs=args, outputs=["max_out"])[0]


@gs.Graph.register()
def identity(self, inp):
    return self.layer(op="Identity", inputs=[inp], outputs=["identity_out"])[0]


# Generate the graph
graph = gs.Graph()

graph.inputs = [gs.Variable("input", shape=(4, 4), dtype=np.float32)]

# Clip values to [0, 6]
MIN_VAL = np.array(0, np.float32)
MAX_VAL = np.array(6, np.float32)

# Add identity nodes to make the graph structure a bit more interesting
inp = graph.identity(graph.inputs[0])
max_out = graph.max(graph.min(inp, MAX_VAL), MIN_VAL)
graph.outputs = [
    graph.identity(max_out),
]

# Graph outputs must include dtype information

예제 #9

0

파일 보기

def relu(self, a):
    return propagate_dtype(self.layer(op="Relu", inputs=[a], outputs=["act_out_gs"]), a.dtype)


# Note that the same function can be defined in different ways for different opsets.
# It will only be called if the Graph's opset matches one of the opsets for which the function is registered.
# Hence, for the opset 11 graph used in this example, the following function will never be used.
@gs.Graph.register(opsets=[1])
def relu(self, a):
    raise NotImplementedError("This function has not been implemented!")


##########################################################################################################
# The functions registered above greatly simplify the process of building the graph itself.

graph = gs.Graph(opset=11)

# Generates a graph which computes:
# output = ReLU((A * X^T) + B) (.) C + D
X = gs.Variable(name="X", shape=(64, 64), dtype=np.float32)
graph.inputs = [X]

# axt = (A * X^T)
# Note that we can use NumPy arrays directly (e.g. Tensor A),
# instead of Constants. These will automatically be converted to Constants.
A = np.ones(shape=(64, 64), dtype=np.float32)
axt = graph.gemm(A, X, trans_b=True)

# dense = ReLU(axt + B)
B = np.ones((64, 64), dtype=np.float32) * 0.5
dense = graph.relu(*graph.add(*axt, B))

예제 #10

0

파일 보기

                outputs=[tensor8])
node2 = gs.Node(name="myAdd2",
                op="Add",
                inputs=[tensor0, tensor8],
                outputs=[tensor1])  # 有效节点
node3 = gs.Node(name="myAdd3",
                op="Add",
                inputs=[tensor1, constant0],
                outputs=[tensor2])  # 有效节点
node4 = gs.Node(name="myAdd4",
                op="Add",
                inputs=[tensor5, constant0],
                outputs=[tensor6])  # 无效节点

graph = gs.Graph(nodes=[node4, node3, node2, node1, node0],
                 inputs=[tensor0, tensor3],
                 outputs=[tensor2, tensor4])

onnx.save(gs.export_onnx(graph),
          "model-06-01.onnx")  # 原始计算图，可见 4 个无边张量和 1 个无边的节点，还有 1 个常数计算链
onnx.save(
    gs.export_onnx(graph.fold_constants()), "model-06-02.onnx"
)  # 常数折叠后的计算图，常数计算链合并到主链中，多出 2 个无边 Add 节点，注意常数折叠并不做节点融合的工作，主链上两个 Add 没有合并掉
onnx.save(gs.export_onnx(graph.fold_constants().cleanup()),
          "model-06-03.onnx")  # 打扫后的计算图，可见 3 个无用的 Add 节点被清除

print("Before toposort:")  # 原始节点顺序
for index, node in enumerate(graph.nodes):
    print("No.%d->%s" % (index, node.name))

print("After toposort:")  # 拓扑排序后的节点顺序，节点基本按照计算图的计算顺序进行排列

예제 #11

0

파일 보기

파일: main-script.py 프로젝트: NVIDIA/trt-samples-for-hackathon-cn

def run(nM, nK, nN):
    tensor0 = gs.Variable("tensor0", np.float32, [nM, 1])

    constant1xK = gs.Constant(
        "constant1xK",
        np.ascontiguousarray(
            np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1))
    constantKxN = gs.Constant(
        "constantKxN",
        np.ascontiguousarray(
            np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1))
    constantN = gs.Constant(
        "constantN",
        np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1))
    constantNxK = gs.Constant(
        "constantNxK",
        np.ascontiguousarray(
            np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1))
    constantK = gs.Constant(
        "constantK",
        np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1))
    constantM1 = gs.Constant(
        "constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64)))

    graphNodeList = []

    tensor1 = gs.Variable("tensor1", np.float32, None)
    node1 = gs.Node("MatMul",
                    "MMU1",
                    inputs=[tensor0, constant1xK],
                    outputs=[tensor1])
    graphNodeList.append(node1)

    tensorLoop = tensor1
    for i in range(nLoop):
        tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None)
        node2 = gs.Node("MatMul",
                        "MMU-" + str(i),
                        inputs=[tensorLoop, constantKxN],
                        outputs=[tensor2])
        graphNodeList.append(node2)

        tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None)
        node3 = gs.Node("Add",
                        "AddU-" + str(i),
                        inputs=[tensor2, constantN],
                        outputs=[tensor3])
        graphNodeList.append(node3)

        tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None)
        node4 = gs.Node("Relu",
                        "ReLUU-" + str(i),
                        inputs=[tensor3],
                        outputs=[tensor4])
        graphNodeList.append(node4)

        tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None)
        node5 = gs.Node("MatMul",
                        "MMD-" + str(i),
                        inputs=[tensor4, constantNxK],
                        outputs=[tensor5])
        graphNodeList.append(node5)

        tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None)
        node6 = gs.Node("Add",
                        "AddD-" + str(i),
                        inputs=[tensor5, constantK],
                        outputs=[tensor6])
        graphNodeList.append(node6)

        tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None)
        node7 = gs.Node("Relu",
                        "ReLUD-" + str(i),
                        inputs=[tensor6],
                        outputs=[tensor7])
        graphNodeList.append(node7)

        tensorLoop = tensor7

    tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None)
    node8 = gs.Node("ReduceSum",
                    "Reduce",
                    inputs=[tensorLoop, constantM1],
                    outputs=[tensor8],
                    attrs=OrderedDict([('keepdims', 0)]))
    graphNodeList.append(node8)

    graph = gs.Graph(nodes=graphNodeList,
                     inputs=[tensor0],
                     outputs=[tensor8],
                     opset=13)

    onnxFile = "model-%d-%d-%d.onnx" % (nM, nK, nN)
    onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile)
    print("Succeeded building %s!" % (onnxFile))

    logger = trt.Logger(trt.Logger.VERBOSE)
    builder = trt.Builder(logger)
    network = builder.create_network(
        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    config = builder.create_builder_config()
    config.max_workspace_size = 22 << 30

    parser = trt.OnnxParser(network, logger)
    with open(onnxFile, 'rb') as model:
        parser.parse(model.read())

    engineString = builder.build_serialized_network(network, config)
    planFile = onnxFile.split('.')[0] + ".plan"
    with open(planFile, 'wb') as f:
        f.write(engineString)

    print("Succeeded building %s!" % (planFile))

    os.system(
        "trtexec --loadEngine=%s --useCudaGraph --noDataTransfers --fp16" %
        planFile)

예제 #12

0

파일 보기

# Generates a model with multiple inputs/outputs. Something like:
#    X0    Y0
#    |     |
#    X1    Y1
#      \  /
#       Z0
#      /  \
#    Z1    Z2
DTYPE = np.float32
SHAPE = (1,)

X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE)
Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE)

graph = gs.Graph(inputs=[X0, Y0])

X1 = graph.identity(X0)
Y1 = graph.identity(Y0)

Z0 = graph.add(X1, Y1)

Z1 = graph.identity(Z0)
Z1.dtype = DTYPE
Z1.shape = SHAPE

Z2 = graph.identity(Z0)
Z2.dtype = DTYPE
Z2.shape = SHAPE

graph.outputs = [Z1, Z2]

예제 #13

0

파일 보기

# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx
import onnx_graphsurgeon as gs
import numpy as np

x = gs.Variable(name="x", dtype=np.float32, shape=[1, 3, 5, 5])
_0 = gs.Variable(name="_0", dtype=np.float32, shape=[1, 3, 5, 5])
_1 = gs.Variable(name="_1", dtype=np.float32, shape=[1, 3, 5, 5])
y = gs.Variable(name="y", dtype=np.float32, shape=[1, 3, 5, 5])

node0 = gs.Node(op="Identity", inputs=[x], outputs=[_0])
node1 = gs.Node(op="NonZero", inputs=[_0], outputs=[_1])
node2 = gs.Node(op="Identity", inputs=[_1], outputs=[y])

graph = gs.Graph(nodes=[node0, node1, node2], inputs=[x], outputs=[y])
onnx.save(gs.export_onnx(graph), "model-NonZero.onnx")

예제 #14

0

파일 보기

파일: 07-ShapeOperationAndSimplify.py 프로젝트: NVIDIA/trt-samples-for-hackathon-cn

node5 = gs.Node("ReduceProd",
                "myReduceProd1",
                inputs=[tensor5],
                attrs={
                    "axes": [0],
                    "keepdims": int(True)
                },
                outputs=[tensor6])  # value=(B*5), shape=()
node6 = gs.Node("Concat",
                "myConcat",
                inputs=[tensor4, tensor6],
                attrs={"axis": 0},
                outputs=[tensor7])  # value=(A,3,B*5), shape=()
node7 = gs.Node("Reshape",
                "myReshape1",
                inputs=[tensor0, tensor7],
                outputs=[tensor8])  # shape=(A*3*B*5,)

graph = gs.Graph(
    nodes=[node0, node1, node2, node3, node4, node5, node6, node7],
    inputs=[tensor0],
    outputs=[tensor3, tensor8])

graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-07-01.onnx")

graph.inputs[0].shape = [2, 3, 4,
                         5]  # 如果是 static shape，则 fold_constants 可以化简计算图
graph.fold_constants().cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-07-02.onnx")

예제 #15

0

파일 보기

graphNodeList.append(node12)

tensor13 = gs.Variable("tensor-13", np.float32, None)
node13 = gs.Node("Add",
                 "Add-13",
                 inputs=[tensor12, constant10],
                 outputs=[tensor13])
graphNodeList.append(node13)

tensor14 = gs.Variable("tensor-14", np.float32, None)
node14 = gs.Node("Softmax",
                 "Softmax-14",
                 inputs=[tensor13],
                 outputs=[tensor14],
                 attrs=OrderedDict([('axis', 1)]))
graphNodeList.append(node14)

tensor15 = gs.Variable("tensor-15", np.int64, None)
node15 = gs.Node("ArgMax",
                 "ArgMax-15",
                 inputs=[tensor14],
                 outputs=[tensor15],
                 attrs=OrderedDict([('axis', 1), ('keepdims', 0)]))
graphNodeList.append(node15)

graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor15])

graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), onnxFile)
print("Succeeded create %s" % onnxFile)

예제 #16

0

파일 보기

def insert_decoder_loop(decoder_iter_onnx_path, output_dir, decoder_out_name,
                        fp16):
    float_prec = np.float16 if fp16 else np.float32

    # Modify loop body so that it has 2+N inputs: (iteration_num, condition, loop carried dependencies...)
    # and 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...)

    # In this case, the loop carried dependencies include the following IN ORDER
    #    - decoder_output/decoder_input
    #    - attention_hidden
    #    - attention_cell
    #    - decoder_hidden
    #    - decoder_cell
    #    - attention_weights
    #    - attention_weights_cum
    #    - attention_context
    #    - not_finished (bool tensor, initialized to all True)
    #    - mel_lengths

    # The following are NOT loop carried dependencies (they remain constant through the loop), and must be moved to be inputs outside of the loop body
    #    - memory
    #    - processed_memory
    #    - mask

    # The scan outputs are
    #    - mel_outputs (which scans across decoder_output)
    #    - gate_outputs (scans across gate_prediction)
    #    - alignments (scans across attention_weights)

    loop_body = gs.import_onnx(onnx.load(decoder_iter_onnx_path))
    loop_tensors = loop_body.tensors()

    iteration_num = gs.Variable("iteration_num", dtype=np.int64, shape=())
    cond_in = gs.Variable("cond_in", dtype=bool, shape=())
    cond_out = gs.Variable("cond_out", dtype=bool, shape=())
    not_finished_in = gs.Variable("not_finished_in",
                                  shape=('batch_size', 1),
                                  dtype=bool)
    not_finished_out = gs.Variable("not_finished_out",
                                   shape=('batch_size', 1),
                                   dtype=bool)
    mel_lengths_in = gs.Variable("mel_lengths_in",
                                 shape=('batch_size', 1),
                                 dtype=np.int32)
    mel_lengths_out = gs.Variable("mel_lengths_out",
                                  shape=('batch_size', 1),
                                  dtype=np.int32)

    # Set loop body inputs in the correct order
    loop_body.inputs = [
        iteration_num, cond_in, loop_tensors["decoder_input"],
        loop_tensors["attention_hidden"], loop_tensors["attention_cell"],
        loop_tensors["decoder_hidden"], loop_tensors["decoder_cell"],
        loop_tensors["attention_weights"],
        loop_tensors["attention_weights_cum"],
        loop_tensors["attention_context"], not_finished_in, mel_lengths_in
    ]

    # Set loop body outputs in the correct order
    loop_body.outputs = [
        cond_out, loop_tensors["decoder_output"],
        loop_tensors["out_attention_hidden"],
        loop_tensors["out_attention_cell"], loop_tensors["out_decoder_hidden"],
        loop_tensors["out_decoder_cell"],
        loop_tensors["out_attention_weights"],
        loop_tensors["out_attention_weights_cum"],
        loop_tensors["out_attention_context"], not_finished_out,
        mel_lengths_out, loop_tensors["decoder_output"],
        loop_tensors["gate_prediction"], loop_tensors["out_attention_weights"]
    ]

    # The loop stop condition is given by the following lines in PyTorch
    #     dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1)
    #     not_finished = not_finished*dec
    #     if torch.sum(not_finished) == 0:
    #         break

    # To compute cond_out, we can essentially follow the same steps. Using Less instead of Greater+Not for now

    gate_threshold = gs.Constant("gate_threshold",
                                 np.array([0.5], dtype=float_prec))
    gate_sigmoid = gs.Variable("gate_sigmoid", dtype=float_prec, shape=())
    sigmoid = loop_body.nodes.append(
        gs.Node(op="Sigmoid",
                inputs=[loop_tensors["gate_prediction"]],
                outputs=[gate_sigmoid]))

    leq_output = gs.Variable("leq_output", dtype=bool)
    leq = loop_body.nodes.append(
        gs.Node(op="Less",
                inputs=[gate_sigmoid, gate_threshold],
                outputs=[leq_output]))

    loop_body.nodes.append(
        gs.Node(op="And",
                inputs=[not_finished_in, leq_output],
                outputs=[not_finished_out]))

    cast_output = gs.Variable("cast_output", dtype=np.int32)
    loop_body.nodes.append(
        gs.Node(op="Cast",
                inputs=[not_finished_out],
                outputs=[cast_output],
                attrs={"to": 6}))  # int32

    reduce_output = gs.Variable("reduce_output", dtype=np.int32)
    loop_body.nodes.append(
        gs.Node(op="ReduceSum",
                inputs=[cast_output],
                outputs=[reduce_output],
                attrs={
                    "axes": [0],
                    "keepdims": 0
                }))

    unsqueezed_cond_out = gs.Variable("unsqueezed_cond_out", dtype=bool)
    loop_body.nodes.append(
        gs.Node(op="Equal",
                inputs=[
                    reduce_output,
                    gs.Constant("zero", np.array(0, dtype=np.int32))
                ],
                outputs=[unsqueezed_cond_out]))

    squeezed_cond_out = gs.Variable("squeezed_cond_out", dtype=bool)
    loop_body.nodes.append(
        gs.Node(op="Squeeze",
                inputs=[unsqueezed_cond_out],
                outputs=[squeezed_cond_out],
                attrs={"axes": [0]}))

    loop_body.nodes.append(
        gs.Node(op="Not", inputs=[squeezed_cond_out], outputs=[cond_out]))

    # Compute mel_lengths
    #  from PyTorch:  mel_lengths += not_finished

    loop_body.nodes.append(
        gs.Node(op="Add",
                inputs=[mel_lengths_in, cast_output],
                outputs=[mel_lengths_out]))

    memory = gs.Variable("memory",
                         dtype=float_prec,
                         shape=('batch_size', 'seq_len', 512))
    processed_memory = gs.Variable("processed_memory",
                                   dtype=float_prec,
                                   shape=('batch_size', 'seq_len', 128))
    mask = gs.Variable("mask", dtype=bool, shape=('batch_size', 'seq_len'))

    loop_body.toposort()
    onnx.save(
        gs.export_onnx(loop_body),
        os.path.join(
            output_dir, "loop_body_{prec}.onnx".format(
                prec="fp16" if float_prec == np.float16 else "fp32")))

    # Create outer graph

    # Inputs to outer graph are the following (suffixed with _0 to signify initial states)
    #    - decoder_input_0
    #    - attention_hidden_0
    #    - attention_cell_0
    #    - decoder_hidden_0
    #    - decoder_cell_0
    #    - attention_weights_0
    #    - attention_weights_cum_0
    #    - attention_context_0
    #    - memory
    #    - processed_memory
    #    - mask

    # Outputs are the following
    #    - mel_outputs
    #    - mel_lengths

    # Note: alignments and gate_outputs are scan outputs, but don't seem to be used later in the PyTorch implementation. For now, we will make them intermediate tensors that are not outputted

    graph = gs.Graph()

    decoder_input_0 = gs.Variable("decoder_input_0",
                                  dtype=float_prec,
                                  shape=('batch_size', 80))
    attention_hidden_0 = gs.Variable("attention_hidden_0",
                                     dtype=float_prec,
                                     shape=('batch_size', 1024))
    attention_cell_0 = gs.Variable("attention_cell_0",
                                   dtype=float_prec,
                                   shape=('batch_size', 1024))
    decoder_hidden_0 = gs.Variable("decoder_hidden_0",
                                   dtype=float_prec,
                                   shape=('batch_size', 1024))
    decoder_cell_0 = gs.Variable("decoder_cell_0",
                                 dtype=float_prec,
                                 shape=('batch_size', 1024))
    attention_weights_0 = gs.Variable("attention_weights_0",
                                      dtype=float_prec,
                                      shape=('batch_size', 'seq_len'))
    attention_weights_cum_0 = gs.Variable("attention_weights_cum_0",
                                          dtype=float_prec,
                                          shape=('batch_size', 'seq_len'))
    attention_context_0 = gs.Variable("attention_context_0",
                                      dtype=float_prec,
                                      shape=('batch_size', 512))
    not_finished_0 = gs.Variable("not_finished_0", dtype=bool)
    mel_lengths_0 = gs.Variable("mel_lengths_0", dtype=np.int32)

    # For not_finished, we need to generate a tensor of shape (batch_size) that is all 1s
    # We can use the ONNX ConstantOfShape op to do this
    not_finished_shape = gs.Variable("not_finished_shape", dtype=np.int64)
    reduced = gs.Variable("reduced", dtype=float_prec)
    graph.nodes.append(
        gs.Node(op="ReduceSum",
                inputs=[decoder_input_0],
                outputs=[reduced],
                attrs={
                    "axes": [1],
                    "keepdims": 1
                }))
    graph.nodes.append(
        gs.Node(op="Shape", inputs=[reduced], outputs=[not_finished_shape]))
    before_cast = gs.Variable("before_cast", dtype=np.int32)
    graph.nodes.append(
        gs.Node(
            op="ConstantOfShape",
            inputs=[not_finished_shape],
            outputs=[before_cast],
            attrs={"value": gs.Constant("one", np.array([1],
                                                        dtype=np.int32))}))
    graph.nodes.append(
        gs.Node(op="Cast",
                inputs=[before_cast],
                outputs=[not_finished_0],
                attrs={"to": 9}))

    # Same thing for mel_lengths, but we need all 0s
    graph.nodes.append(
        gs.Node(op="ConstantOfShape",
                inputs=[not_finished_shape],
                outputs=[mel_lengths_0],
                attrs={
                    "value": gs.Constant("zero", np.array([0], dtype=np.int32))
                }))

    # Loop carried dependecies at the end of the loop
    decoder_input_t = gs.Variable("decoder_input_t",
                                  dtype=float_prec,
                                  shape=('batch_size', 80))
    attention_hidden_t = gs.Variable("attention_hidden_t",
                                     dtype=float_prec,
                                     shape=('batch_size', 1024))
    attention_cell_t = gs.Variable("attention_cell_t",
                                   dtype=float_prec,
                                   shape=('batch_size', 1024))
    decoder_hidden_t = gs.Variable("decoder_hidden_t",
                                   dtype=float_prec,
                                   shape=('batch_size', 1024))
    decoder_cell_t = gs.Variable("decoder_cell_t",
                                 dtype=float_prec,
                                 shape=('batch_size', 1024))
    attention_weights_t = gs.Variable("attention_weights_t",
                                      dtype=float_prec,
                                      shape=('batch_size', 'seq_len'))
    attention_weights_cum_t = gs.Variable("attention_weights_cum_t",
                                          dtype=float_prec,
                                          shape=('batch_size', 'seq_len'))
    attention_context_t = gs.Variable("attention_context_t",
                                      dtype=float_prec,
                                      shape=('batch_size', 512))
    not_finished_t = gs.Variable("not_finished_t", dtype=bool)
    mel_lengths_t = gs.Variable("mel_lengths_t",
                                dtype=np.int32,
                                shape=('batch_size', 1))

    # Scan outputs
    mel_outputs_raw = gs.Variable("mel_outputs_raw",
                                  dtype=float_prec,
                                  shape=(-1, 'batch_size', 80))
    gate_outputs = gs.Variable("gate_outputs",
                               dtype=float_prec,
                               shape=(-1, 'batch_size', 1))
    alignments = gs.Variable("alignments",
                             dtype=float_prec,
                             shape=(-1, 1, 'seq_len'))

    mel_outputs = gs.Variable("mel_outputs",
                              dtype=float_prec,
                              shape=('batch_size', 80, -1))

    graph.inputs = [
        decoder_input_0, attention_hidden_0, attention_cell_0,
        decoder_hidden_0, decoder_cell_0, attention_weights_0,
        attention_weights_cum_0, attention_context_0, memory, processed_memory,
        mask
    ]
    graph.outputs = [mel_outputs, mel_lengths_t]

    trip_count = gs.Constant(
        "trip_count", np.array(0, dtype=np.int64)
    )  # In ONNX, this is an optional parameter, but I don't think ONNX-GS supports optional inputs. To fix this, after we export the ONNX ModelProto from GS, we replace this input with ""
    initial_cond = gs.Constant("initial_cond", np.array(True, dtype=bool))
    loop_inputs = [
        trip_count, initial_cond, decoder_input_0, attention_hidden_0,
        attention_cell_0, decoder_hidden_0, decoder_cell_0,
        attention_weights_0, attention_weights_cum_0, attention_context_0,
        not_finished_0, mel_lengths_0
    ]
    loop_outputs = [
        decoder_input_t, attention_hidden_t, attention_cell_t,
        decoder_hidden_t, decoder_cell_t, attention_weights_t,
        attention_weights_cum_t, attention_context_t, not_finished_t,
        mel_lengths_t, mel_outputs_raw, gate_outputs, alignments
    ]
    decoder_loop = gs.Node(op="Loop",
                           name="decoder_loop",
                           inputs=loop_inputs,
                           outputs=loop_outputs,
                           attrs={"body": loop_body})
    graph.nodes.append(decoder_loop)

    graph.nodes.append(
        gs.Node(op="Transpose",
                inputs=[mel_outputs_raw],
                outputs=[mel_outputs],
                attrs={
                    "perm": [1, 2, 0]
                }))  # Output needs to have loop dimension as inner-most dim

    graph.toposort()
    exported_graph = gs.export_onnx(graph)
    [x for x in exported_graph.graph.node
     if x.name == "decoder_loop"][0].input[0] = ""  # Remove trip count input

    onnx.save(exported_graph, os.path.join(output_dir, decoder_out_name))

예제 #17

0

파일 보기

#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

# Inputs
x = gs.Variable(name="x", dtype=np.float32, shape=(1, 3, 224, 224))

# Intermediate tensors
i0 = gs.Variable(name="i0")
i1 = gs.Variable(name="i1")

# Outputs
y = gs.Variable(name="y", dtype=np.float32)

nodes = [
    gs.Node(op="Identity", inputs=[x], outputs=[i0]),
    gs.Node(op="FakeNodeToRemove", inputs=[i0], outputs=[i1]),
    gs.Node(op="Identity", inputs=[i1], outputs=[y]),
]

graph = gs.Graph(nodes=nodes, inputs=[x], outputs=[y])
onnx.save(gs.export_onnx(graph), "model.onnx")

예제 #18

0

파일 보기

파일: 03-RemoveNode.py 프로젝트: NVIDIA/trt-samples-for-hackathon-cn

constant0 = gs.Constant(name="constant0",
                        values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32))

node0 = gs.Node(name="myIdentity0",
                op="Identity",
                inputs=[tensor0],
                outputs=[tensor1])
node1 = gs.Node(name="myAdd",
                op="Add",
                inputs=[tensor1, constant0],
                outputs=[tensor2])
node2 = gs.Node(name="myIdentity1",
                op="Identity",
                inputs=[tensor2],
                outputs=[tensor3])

graph = gs.Graph(nodes=[node0, node1, node2],
                 inputs=[tensor0],
                 outputs=[tensor3])
graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-03-01.onnx")

for node in graph.nodes:
    if node.op == 'Add' and node.name == 'myAdd':
        index = node.o().inputs.index(node.outputs[0])  # 小心地找到下一个节点中该张量的位置
        node.o().inputs[index] = node.inputs[0]  # 把下一节点的对应输入张量赋为 Add 节点的输入张量
        node.outputs = []  # 关键操作：将 Add 节点的输出张量设置为空，这样 Add 节点就成为无用节点，可以被自动清理删掉

graph.cleanup().toposort()  # 在清理时会自动删掉 Add 节点+
onnx.save(gs.export_onnx(graph), "model-03-02.onnx")

예제 #19

0

파일 보기

파일: main.py 프로젝트: NVIDIA/trt-samples-for-hackathon-cn

                    "ReLU-" + str(i),
                    inputs=[tensor5],
                    outputs=[tensor6])
    graphNodeList.append(node6)

    tensorLoop = tensor6

tensor7 = gs.Variable("tensor-6", dtype=np.float32, shape=None)
node7 = gs.Node("Conv",
                "Conv1",
                inputs=[tensorLoop, constant1x32],
                outputs=[tensor7])
graphNodeList.append(node7)

graph = gs.Graph(nodes=graphNodeList,
                 inputs=[tensor0],
                 outputs=[tensor7],
                 opset=13)

onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile0)
print("Succeeded building %s!" % (onnxFile0))

# 修改 .onnx
graph = gs.import_onnx(onnx.load(onnxFile0))

constant32r = gs.Constant(
    "constant32r",
    np.ascontiguousarray(
        np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) *
        2 - 1))

for node in graph.nodes:

예제 #20

0

파일 보기

파일: generate.py 프로젝트: npanpaliya/TensorRT-1

import numpy as np
import onnx

# Computes outputs = input + ((a + b) + d)

shape = (1, 3)
# Inputs
input = gs.Variable("input", shape=shape, dtype=np.float32)

# Intermediate tensors
a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
c = gs.Variable("c")
d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32))
e = gs.Variable("e")

# Outputs
output = gs.Variable("output", shape=shape, dtype=np.float32)

nodes = [
    # c = (a + b)
    gs.Node("Add", inputs=[a, b], outputs=[c]),
    # e = (c + d)
    gs.Node("Add", inputs=[c, d], outputs=[e]),
    # output = input + e
    gs.Node("Add", inputs=[input, e], outputs=[output]),
]

graph = gs.Graph(nodes=nodes, inputs=[input], outputs=[output])
onnx.save(gs.export_onnx(graph), "model.onnx")

예제 #21

0

파일 보기

import onnx_graphsurgeon as gs

tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64, 64])
tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=None)
tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=None)

node0 = gs.Node(name="myIdentity0",
                op="Identity",
                inputs=[tensor0],
                outputs=[tensor1])
node1 = gs.Node(name="myIdentity1",
                op="Identity",
                inputs=[tensor1],
                outputs=[tensor2])

graph = gs.Graph(nodes=[node0, node1], inputs=[tensor0], outputs=[tensor2])
graph.cleanup().toposort()
onnx.save(gs.export_onnx(graph), "model-02-01.onnx")

for node in graph.nodes:
    if node.op == 'Identity' and node.name == 'myIdentity0':  # 遍历计算图找到需要添加节点的位置
        constant0 = gs.Constant(name="constant0",
                                values=np.ones(shape=[1, 1, 1, 1],
                                               dtype=np.float32))  # 构造新节点和新张量
        tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None)
        newNode = gs.Node(name="myAdd",
                          op="Add",
                          inputs=[node.outputs[0], constant0],
                          outputs=[tensor3])

        graph.nodes.append(newNode)  # 记得把新节点加入计算图中