Example #1
0
    def add_node(self, graph):
        print("Start adding node(op = %s)"%(self.plugin_name))
        batch_size = graph.inputs[0].shape[0]
        input_h = graph.inputs[0].shape[2]
        input_w = graph.inputs[0].shape[3]
        print("width %d, height %d"%(input_h, input_w))
        print("target Plugin: %s"%(self.plugin_name))

        tensors = graph.tensors()
        boxes_tensor = tensors['boxes']
        confs_tensor = tensors['confs']

        num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1])
        nms_boxes = gs.Variable(name="nms_boxes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK, 4])
        nms_scores = gs.Variable(name="nms_scores").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK])
        nms_classes = gs.Variable(name="nms_classes").to_variable(dtype=np.float32, shape=[batch_size, self.keepTopK])

        outputs = [num_detections, nms_boxes, nms_scores, nms_classes]

        nms_node = gs.Node(
            op=self.plugin_name,
            attrs=self.attrs,
            inputs=[boxes_tensor, confs_tensor],
            outputs=outputs)

        graph.nodes.append(nms_node)
        graph.outputs = outputs
        print("ADD graph node surgery complete")
        return graph.cleanup().toposort()
def create_and_add_plugin_node(graph, topK, keepTopK):
    
    batch_size = graph.inputs[0].shape[0]
    input_h = graph.inputs[0].shape[2]
    input_w = graph.inputs[0].shape[3]

    tensors = graph.tensors()
    boxes_tensor = tensors["boxes"]
    confs_tensor = tensors["confs"]

    num_detections = gs.Variable(name="num_detections").to_variable(dtype=np.int32, shape=[batch_size, 1])
    nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK, 4])
    nmsed_scores = gs.Variable(name="nmsed_scores").to_variable(dtype=np.float32, shape=[batch_size, keepTopK])
    nmsed_classes = gs.Variable(name="nmsed_classes").to_variable(dtype=np.float32, shape=[batch_size, keepTopK])

    new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes]

    mns_node = gs.Node(
        op="BatchedNMS_TRT",
        attrs=create_attrs(input_h, input_w, topK, keepTopK),
        inputs=[boxes_tensor, confs_tensor],
        outputs=new_outputs)

    graph.nodes.append(mns_node)
    graph.outputs = new_outputs

    return graph.cleanup().toposort()
Example #3
0
def make_multi_input_output():
    DTYPE = np.float32
    SHAPE = (1,)

    X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE)
    Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE)

    graph = gs.Graph(inputs=[X0, Y0])

    X1 = graph.identity(X0)
    Y1 = graph.identity(Y0)

    Z0 = graph.add(X1, Y1)

    Z1 = graph.identity(Z0)
    Z1.dtype = DTYPE
    Z1.shape = SHAPE

    Z2 = graph.identity(Z0)
    Z2.dtype = DTYPE
    Z2.shape = SHAPE

    graph.outputs = [Z1, Z2]

    save(graph, "reducable.onnx")
Example #4
0
    def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"):
        """重新修改resize的实现
        """
        graph = gs.import_onnx(onnx.load(input_file))

        first_add = [node for node in graph.nodes
                     if node.op == "LeakyRelu"][0]  # 找到 LeakyRelu 的节点
        # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0]  # 找到 LeakyRelu 的节点
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        # first_add.outputs = [inp for inp in first_add.outputs]  # 找到其对应的输出
        first_add.outputs.clear(
        )  # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出
        # graph.nodes.remove(first_add) # 删除整个节点

        second_add = [node for node in graph.nodes if node.op == "MaxPool"][0]
        # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0]
        second_add.inputs.clear()  # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入

        # 重新定义LeakyRelu层
        attrs = {"alpha": 0.1}
        lrelu = gs.Variable("new_lrelu", np.float32)
        node = gs.Node(op="LeakyRelu",
                       inputs=first_add.inputs,
                       outputs=[lrelu],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新定义resize层(实现upsample)
        attrs = {
            "coordinate_transformation_mode": 'asymmetric',
            "mode": 'nearest',
            "nearest_mode": 'floor',
        }
        layer_name = "new_resize"  # 不要和原来 的resize节点名重复
        scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32)
        scale_name = layer_name + ".scale"
        roi_name = layer_name + ".roi"
        scale = gs.Constant(scale_name, scales)
        roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32))
        # inputs =first_add.outputs
        inputs = [lrelu]
        inputs.append(roi)
        inputs.append(scale)
        resize = gs.Variable(layer_name, dtype=np.float32)
        node = gs.Node(op="Resize",
                       inputs=inputs,
                       outputs=[resize],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新设置下一层的输入节点
        second_add.inputs = [resize]

        # 5. Remove unused nodes/tensors, and topologically sort the graph
        graph.cleanup().toposort()

        onnx.save(gs.export_onnx(graph), output_file)
Example #5
0
def run(nM,nK,nN):
    tensor0 = gs.Variable("tensor0", np.float32, [nM, 1])

    constant1xK = gs.Constant("constant1xK", np.ascontiguousarray(np.random.rand(1, nK).reshape(1, nK).astype(np.float32) * 2 - 1))
    constantKxN = gs.Constant("constantKxN", np.ascontiguousarray(np.random.rand(nK, nN).reshape(nK, nN).astype(np.float32) * 2 - 1))
    constantN = gs.Constant("constantN", np.ascontiguousarray(np.random.rand(nN).astype(np.float32) * 2 - 1))
    constantNxK = gs.Constant("constantNxK", np.ascontiguousarray(np.random.rand(nN, nK).reshape(nN, nK).astype(np.float32) * 2 - 1))
    constantK = gs.Constant("constantK", np.ascontiguousarray(np.random.rand(nK).astype(np.float32) * 2 - 1))
    constantM1 = gs.Constant("constantM1", np.ascontiguousarray(np.array([-1], dtype=np.int64)))

    graphNodeList = []

    tensor1 = gs.Variable("tensor1", np.float32, None)
    node1 = gs.Node("MatMul", "MMU1", inputs=[tensor0, constant1xK], outputs=[tensor1])
    graphNodeList.append(node1)

    tensorLoop = tensor1
    for i in range(nLoop):
        tensor2 = gs.Variable("tensor%d-1" % i, np.float32, None)
        node2 = gs.Node("MatMul", "MMU-" + str(i), inputs=[tensorLoop, constantKxN], outputs=[tensor2])
        graphNodeList.append(node2)

        tensor3 = gs.Variable("tensor%d-2" % i, dtype=np.float32, shape=None)
        node3 = gs.Node("Add", "AddU-" + str(i), inputs=[tensor2, constantN], outputs=[tensor3])
        graphNodeList.append(node3)

        tensor4 = gs.Variable("tensor%d-3" % i, dtype=np.float32, shape=None)
        node4 = gs.Node("Relu", "ReLUU-" + str(i), inputs=[tensor3], outputs=[tensor4])
        graphNodeList.append(node4)

        tensor5 = gs.Variable("tensor%d-4" % i, dtype=np.float32, shape=None)
        node5 = gs.Node("MatMul", "MMD-" + str(i), inputs=[tensor4, constantNxK], outputs=[tensor5])
        graphNodeList.append(node5)

        tensor6 = gs.Variable("tensor%d-5" % i, dtype=np.float32, shape=None)
        node6 = gs.Node("Add", "AddD-" + str(i), inputs=[tensor5, constantK], outputs=[tensor6])
        graphNodeList.append(node6)

        tensor7 = gs.Variable("tensor%d-6" % i, dtype=np.float32, shape=None)
        node7 = gs.Node("Relu", "ReLUD-" + str(i), inputs=[tensor6], outputs=[tensor7])
        graphNodeList.append(node7)

        tensorLoop = tensor7

    tensor8 = gs.Variable("tensor8", dtype=np.float32, shape=None)
    node8 = gs.Node("ReduceSum", "Reduce", inputs=[tensorLoop, constantM1], outputs=[tensor8], attrs=OrderedDict([('keepdims', 0)]))
    graphNodeList.append(node8)

    graph = gs.Graph(nodes=graphNodeList, inputs=[tensor0], outputs=[tensor8], opset=13)

    onnxFile = "model-%d-%d-%d.onnx"%(nM,nK,nN)
    onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile)
    print("Succeeded building %s!" % (onnxFile))

    os.system("trtexec --onnx=%s --useCudaGraph --noDataTransfers --fp16"%onnxFile)
Example #6
0
def onnx_mul(nodes, layer_name, input_node1, input_node2, output_shape):
    inputs = [input_node1, input_node2]
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Mul", inputs=inputs, outputs=[output_node])

    nodes.append(node)
    return output_node
Example #7
0
def onnx_upsample(nodes,
                  layer_name,
                  input_node,
                  output_shape=None,
                  resize_scale_factors=2):
    attrs = {
        "coordinate_transformation_mode": 'asymmetric',
        "mode": 'nearest',
        "nearest_mode": 'floor',
    }
    layer_name = layer_name  # 不要和原来 的resize节点名重复
    scales = np.array([1.0, 1.0, resize_scale_factors,
                       resize_scale_factors]).astype(np.float32)
    scale_name = layer_name + ".scale"
    roi_name = layer_name + ".roi"
    scale = gs.Constant(scale_name, scales)
    roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32))
    inputs = [input_node, roi, scale]
    output_node = gs.Variable(layer_name, dtype=np.float32, shape=output_shape)
    node = gs.Node(op="Resize",
                   inputs=inputs,
                   outputs=[output_node],
                   attrs=attrs)

    nodes.append(node)

    return output_node
Example #8
0
def onnx_slice(nodes,
               layer_name,
               input_node,
               output_shape,
               start=(0, 0, 0, 0),
               shape=(2, 2, 3, 3),
               stride=(1, 1, 1, 1)):
    """
    x = torch.randn([8,8])
    x[:,2:4]

    onnx_slice(nodes,"slice",x,(0,2),(8,4),(1,1))
    """
    inputs = [input_node]

    inputs.extend([
        gs.Constant(layer_name + '_constant_start',
                    np.asarray(start, np.int32)),
        gs.Constant(layer_name + '_constant_shape',
                    np.asarray(shape, np.int32)),
        gs.Constant(layer_name + '_constant_axis',
                    np.arange(0, len(start)).astype(np.int32)),
        gs.Constant(layer_name + '_constant_stride',
                    np.asarray(stride, np.int32)),
    ])
    name = layer_name
    output_node = gs.Variable(name, np.float32, output_shape)

    node = gs.Node(op="Slice", inputs=inputs, outputs=[output_node])

    nodes.append(node)

    return output_node
Example #9
0
    def add_model(cls, input_file="model.onnx", output_file="add.onnx"):
        """增加节点
        在Sigmoid 前增加 LeakyRelu 节点()
        """
        graph = gs.import_onnx(onnx.load(input_file))

        first_add = [node for node in graph.nodes
                     if node.op == "Sigmoid"][-1]  # 找到最后一个名为 Sigmoid 的节点
        # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"]  # 找到其对应的输入
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点)

        # 2. Change the Add to a LeakyRelu
        lrelu = gs.Variable('new_lrelu', dtype=np.float32)
        graph.nodes.append(
            gs.Node(op="LeakyRelu",
                    inputs=first_add.inputs,
                    outputs=[lrelu],
                    attrs={"alpha": 0.02}))

        # 此时 sigmoid输入变成了lrelu(输出)
        first_add.inputs.clear()
        first_add.inputs = [lrelu]

        # 5. Remove unused nodes/tensors, and topologically sort the graph
        graph.cleanup().toposort()

        onnx.save(gs.export_onnx(graph), output_file)
Example #10
0
 def get_tensor(name):
     if name not in tensor_map:
         G_LOGGER.verbose(
             "Tensor: {:} does not exist in the model. Creating a new tensor"
             .format(name))
         tensor_map[name] = gs.Variable(name)
     return tensor_map[name]
Example #11
0
def onnx_exp(nodes, layer_name, input_node, output_shape):
    inputs = [input_node]
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Exp", inputs=inputs, outputs=[output_node])

    nodes.append(node)

    return output_node
Example #12
0
def onnx_reshape(nodes, layer_name, input_node, output_shape, value):
    inputs = [input_node]
    inputs.append(
        gs.Constant(layer_name + '_constant', np.asarray(value, np.int64)))
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Reshape", inputs=inputs, outputs=[output_node])

    nodes.append(node)

    return output_node
Example #13
0
    def run(self, args):
        _, graph = super().import_graph(args)

        TENSOR_MAP = graph.tensors()

        def get_tensor(name):
            if name not in TENSOR_MAP:
                G_LOGGER.critical(
                    "Tensor: {:} does not exist in the model.".format(name))
            return TENSOR_MAP[name]

        # We populate outputs first because we may need to update output nodes from the
        # input tensors if output == input.
        output_tensors = []
        for name in args.outputs:
            if name in args.inputs:
                tensor = gs.Variable(
                    name="{:}_polygraphy_surgeon_insert_output".format(name))

                # Bind outputs to outputs of original inputs.
                # This construct is required to preserve ordering of the input tensors in the output nodes.
                for out in get_tensor(name).outputs:
                    for index, inp in enumerate(out.inputs):
                        if inp.name == name:
                            out.inputs[index] = tensor

                G_LOGGER.verbose(
                    "Generating new tensor for output: {:}".format(tensor))
            else:
                tensor = get_tensor(name)
            tensor.inputs.clear()
            output_tensors.append(tensor)

            if not tensor.outputs:
                for index, out in enumerate(graph.outputs):
                    if out.name == name:
                        graph.outputs[index] = tensor

        input_tensors = []
        for name in args.inputs:
            tensor = get_tensor(name)
            tensor.outputs.clear()
            input_tensors.append(tensor)

        new_node = gs.Node(op=args.op,
                           name=args.name,
                           inputs=input_tensors,
                           outputs=output_tensors)
        G_LOGGER.verbose("Generated new node: {:}".format(new_node))

        graph.nodes.append(new_node)

        # Since new graph outputs may be added, and we don't know the types, we skip type checks in ONNX-GraphSurgeon.
        super().export_graph(graph, args, do_type_check=False)
Example #14
0
def onnx_concat(nodes, layer_name, input_node=[], output_shape=(), axis=1):
    attrs = {"axis": axis}
    inputs = input_node
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Concat",
                   inputs=inputs,
                   outputs=[output_node],
                   attrs=attrs)

    nodes.append(node)

    return output_node
Example #15
0
def create_and_add_plugin_node(graph, args):
    batch_size = graph.inputs[0].shape[0]
    tensors = graph.tensors()
    boxes_tensor = tensors["boxes"]
    confs_tensor = tensors["confs"]
    keepTopK = int(args.keepTopK)

    num_detections = gs.Variable(name="num_detections").to_variable(
        dtype=np.int32, shape=[batch_size, 1])
    nmsed_boxes = gs.Variable(name="nmsed_boxes").to_variable(
        dtype=np.float32, shape=[batch_size, keepTopK, 4])
    nmsed_scores = gs.Variable(name="nmsed_scores").to_variable(
        dtype=np.float32, shape=[batch_size, keepTopK])
    nmsed_classes = gs.Variable(name="nmsed_classes").to_variable(
        dtype=np.float32, shape=[batch_size, keepTopK])

    new_outputs = [num_detections, nmsed_boxes, nmsed_scores, nmsed_classes]

    mns_node = gs.Node(
        op="BatchedNMSDynamic_TRT",
        attrs={
            "shareLocation": 1,
            "backgroundLabelId": -1,
            "numClasses": int(args.nbCls),
            "topK": int(args.topK),
            "keepTopK": keepTopK,
            "scoreThreshold": float(args.score),
            "iouThreshold": float(args.iou),
            "isNormalized": 1,
            "clipBoxes": 1,
            "plugin_version": "1",
        },
        inputs=[boxes_tensor, confs_tensor],
        outputs=new_outputs,
    )

    graph.nodes.append(mns_node)
    graph.outputs = new_outputs

    return graph.cleanup().toposort()
Example #16
0
def append_nms(graph, num_classes, scoreThreshold, iouThreshold, keepTopK):
    out_tensors = graph.outputs
    bs = out_tensors[0].shape[0]

    nms_attrs = {
        'shareLocation': True,
        'backgroundLabelId': -1,
        'numClasses': num_classes,
        'topK': 1024,
        'keepTopK': keepTopK,
        'scoreThreshold': scoreThreshold,
        'iouThreshold': iouThreshold,
        'isNormalized': True,
        'clipBoxes': True
    }

    nms_num_detections = gs.Variable(name="nms_num_detections",
                                     dtype=np.int32,
                                     shape=(bs, 1))
    nms_boxes = gs.Variable(name="nms_boxes",
                            dtype=np.float32,
                            shape=(bs, keepTopK, 4))
    nms_scores = gs.Variable(name="nms_scores",
                             dtype=np.float32,
                             shape=(bs, keepTopK))
    nms_classes = gs.Variable(name="nms_classes",
                              dtype=np.float32,
                              shape=(bs, keepTopK))

    nms = gs.Node(
        op="BatchedNMSDynamic_TRT",
        attrs=nms_attrs,
        inputs=out_tensors,
        outputs=[nms_num_detections, nms_boxes, nms_scores, nms_classes])
    graph.nodes.append(nms)
    graph.outputs = [nms_num_detections, nms_boxes, nms_scores, nms_classes]

    return graph
Example #17
0
    def test_with_nested_graph(self):
        cond = gs.Variable("cond", dtype=np.bool, shape=(1, ))

        X = gs.Variable("X", dtype=np.float32, shape=(1, ))
        Y = gs.Constant("Y", values=np.ones((1, ), dtype=np.float32))
        graph = Graph(inputs=[X, cond])

        then_graph = Graph(name="Then")
        then_graph.outputs = [then_graph.add(Y, Y)]

        else_graph = Graph(name="Else")
        else_graph.outputs = [else_graph.add(X, else_graph.add(Y, Y))]

        graph.outputs = [graph.if_op(cond, then_graph, else_graph)]

        graph.fold_constants()
        graph.cleanup()

        assert len(then_graph.nodes) == 0
        assert np.all(then_graph.outputs[0].values == (Y.values * 2))

        assert len(else_graph.nodes) == 1
        assert isinstance(else_graph.nodes[0].inputs[1], Constant)
        assert np.all(else_graph.nodes[0].inputs[1].values == (Y.values * 2))
Example #18
0
def onnx_transpose(nodes,
                   layer_name,
                   input_node,
                   output_shape,
                   perm=[0, 1, 2, 3]):
    attrs = {"perm": perm}
    inputs = [input_node]
    output_node = gs.Variable(layer_name, np.float32, output_shape)
    node = gs.Node(op="Transpose",
                   inputs=inputs,
                   outputs=[output_node],
                   attrs=attrs)

    nodes.append(node)

    return output_node
Example #19
0
    def test_const_inp_but_non_foldable_nested_graph(self):
        cond = gs.Constant("cond", values=np.array(True))
        X = gs.Variable("X", dtype=np.float32, shape=(1, ))

        graph = Graph(inputs=[X])

        then_graph = Graph(name="Then")
        then_graph.outputs = [then_graph.add(X, X)]

        else_graph = Graph(name="Else")
        else_graph.outputs = [else_graph.add(X, else_graph.add(X, X))]

        # Even though if_op looks foldable because it has all constant inputs,
        # it's not, since its subgraphs depend on variables in the outer scope.
        graph.outputs = [graph.if_op(cond, then_graph, else_graph)]

        # This should not raise because the `If` node should be excluded from
        # constant folding.
        graph.fold_constants(error_ok=False).cleanup()

        assert graph.nodes[0].op == "If"
        assert len(then_graph.nodes) == 1
        assert len(else_graph.nodes) == 2
Example #20
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

# Computes outputs = input + ((a + b) + d)

shape = (1, 3)
# Inputs
input = gs.Variable("input", shape=shape, dtype=np.float32)

# Intermediate tensors
a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
c = gs.Variable("c")
d = gs.Constant("d", values=np.ones(shape=shape, dtype=np.float32))
e = gs.Variable("e")

# Outputs
output = gs.Variable("output", shape=shape, dtype=np.float32)

nodes = [
    # c = (a + b)
    gs.Node("Add", inputs=[a, b], outputs=[c]),
    # e = (c + d)
#

from collections import OrderedDict
import numpy as np
import onnx
import onnx_graphsurgeon as gs
import os
import tensorrt as trt

nLoop = 10
nC = 32
onnxFile0 = "model-0.onnx"
onnxFile1 = "model-1.onnx"

tensor0 = gs.Variable(name="tensor-0",
                      dtype=np.float32,
                      shape=['B', 1, 16, 16])

constant32x1 = gs.Constant(
    "constant32x1",
    np.ascontiguousarray(
        np.random.rand(nC, 1, 3, 3).reshape(nC, 1, 3, 3).astype(np.float32) *
        2 - 1))
constant32x32 = gs.Constant(
    "constant32x32",
    np.ascontiguousarray(
        np.random.rand(nC, nC, 3, 3).reshape(nC, nC, 3, 3).astype(np.float32) *
        2 - 1))
constant32 = gs.Constant(
    "constant32",
    np.ascontiguousarray(
Example #22
0
    def update_nms(self, threshold=None, detections=None):
        """
        Updates the graph to replace the NMS op by BatchedNMS_TRT TensorRT plugin node.
        :param threshold: Override the score threshold attribute. If set to None, use the value in the graph.
        :param detections: Override the max detections attribute. If set to None, use the value in the graph.
        """
        def find_head_concat(name_scope):
            # This will find the concatenation node at the end of either Class Net or Box Net. These concatenation nodes
            # bring together prediction data for each of 5 scales.
            # The concatenated Class Net node will have shape [batch_size, num_anchors, num_classes],
            # and the concatenated Box Net node has the shape [batch_size, num_anchors, 4].
            # These concatenation nodes can be be found by searching for all Concat's and checking if the node two
            # steps above in the graph has a name that begins with either "box_net/..." or "class_net/...".
            for node in [
                    node for node in self.graph.nodes
                    if node.op == "Transpose" and name_scope in node.name
            ]:
                concat = self.graph.find_descendant_by_op(node, "Concat")
                assert concat and len(concat.inputs) == 5
                log.info("Found {} node '{}' as the tip of {}".format(
                    concat.op, concat.name, name_scope))
                return concat

        def extract_anchors_tensor(split):
            # This will find the anchors that have been hardcoded somewhere within the ONNX graph.
            # The function will return a gs.Constant that can be directly used as an input to the NMS plugin.
            # The anchor tensor shape will be [1, num_anchors, 4]. Note that '1' is kept as first dim, regardless of
            # batch size, as it's not necessary to replicate the anchors for all images in the batch.

            # The anchors are available (one per coordinate) hardcoded as constants within certain box decoder nodes.
            # Each of these four constants have shape [1, num_anchors], so some numpy operations are used to expand the
            # dims and concatenate them as needed.

            # These constants can be found by starting from the Box Net's split operation , and for each coordinate,
            # walking down in the graph until either an Add or Mul node is found. The second input on this nodes will
            # be the anchor data required.
            def get_anchor_np(output_idx, op):
                node = self.graph.find_descendant_by_op(
                    split.o(0, output_idx), op)
                assert node
                val = np.squeeze(node.inputs[1].values)
                return np.expand_dims(val.flatten(), axis=(0, 2))

            anchors_y = get_anchor_np(0, "Add")
            anchors_x = get_anchor_np(1, "Add")
            anchors_h = get_anchor_np(2, "Mul")
            anchors_w = get_anchor_np(3, "Mul")
            anchors = np.concatenate(
                [anchors_y, anchors_x, anchors_h, anchors_w], axis=2)
            return gs.Constant(name="nms/anchors:0", values=anchors)

        self.infer()

        head_names = []
        if self.api == "AutoML":
            head_names = ["class_net/", "box_net/"]
        if self.api == "TFOD":
            head_names = [
                "/WeightSharedConvolutionalClassHead/",
                "/WeightSharedConvolutionalBoxHead/"
            ]

        # There are five nodes at the bottom of the graph that provide important connection points:

        # 1. Find the concat node at the end of the class net (multi-scale class predictor)
        class_net = find_head_concat(head_names[0])
        class_net_tensor = class_net.outputs[0]

        # 2. Find the concat node at the end of the box net (multi-scale localization predictor)
        box_net = find_head_concat(head_names[1])
        box_net_tensor = box_net.outputs[0]

        # 3. Find the split node that separates the box net coordinates and feeds them into the box decoder.
        box_net_split = self.graph.find_descendant_by_op(box_net, "Split")
        assert box_net_split and len(box_net_split.outputs) == 4

        # 4. Find the concat node at the end of the box decoder.
        box_decoder = self.graph.find_descendant_by_op(box_net_split, "Concat")
        assert box_decoder and len(box_decoder.inputs) == 4
        box_decoder_tensor = box_decoder.outputs[0]

        # 5. Find the NMS node.
        nms_node = self.graph.find_node_by_op("NonMaxSuppression")

        # Extract NMS Configuration
        num_detections = int(
            nms_node.inputs[2].values) if detections is None else detections
        iou_threshold = float(nms_node.inputs[3].values)
        score_threshold = float(
            nms_node.inputs[4].values) if threshold is None else threshold
        num_classes = class_net.i().inputs[1].values[-1]
        normalized = True if self.api == "TFOD" else False

        # NMS Inputs and Attributes
        # NMS expects these shapes for its input tensors:
        # box_net: [batch_size, number_boxes, 4]
        # class_net: [batch_size, number_boxes, number_classes]
        # anchors: [1, number_boxes, 4] (if used)
        nms_op = None
        nms_attrs = None
        nms_inputs = None
        if not self.legacy_plugins:
            # EfficientNMS TensorRT Plugin
            # Fusing the decoder will always be faster, so this is the default NMS method supported. In this case,
            # three inputs are given to the NMS TensorRT node:
            # - The box predictions (from the Box Net node found above)
            # - The class predictions (from the Class Net node found above)
            # - The default anchor coordinates (from the extracted anchor constants)
            # As the original tensors from EfficientDet will be used, the NMS code type is set to 1 (Center+Size),
            # because this is the internal box coding format used by the network.
            anchors_tensor = extract_anchors_tensor(box_net_split)
            nms_inputs = [box_net_tensor, class_net_tensor, anchors_tensor]
            nms_op = "EfficientNMS_TRT"
            nms_attrs = {
                'plugin_version': "1",
                'background_class': -1,
                'max_output_boxes': num_detections,
                'score_threshold':
                max(0.01, score_threshold
                    ),  # Keep threshold to at least 0.01 for better efficiency
                'iou_threshold': iou_threshold,
                'score_activation': True,
                'box_coding': 1,
            }
            nms_output_classes_dtype = np.int32
        else:
            # BatchedNMS TensorRT Plugin
            # Alternatively, the ONNX box decoder can be used. This will be slower, as more element-wise and non-fused
            # operations will need to be performed by TensorRT. However, it's easier to implement, so it is shown here
            # for reference. In this case, only two inputs are given to the NMS TensorRT node:
            # - The box predictions (already decoded through the ONNX Box Decoder node)
            # - The class predictions (from the Class Net node found above, but also needs to pass through a sigmoid)
            # This time, the box predictions will have the coordinate coding from the ONNX box decoder, which matches
            # what the BatchedNMS plugin uses.

            if self.api == "AutoML":
                # The default boxes tensor has shape [batch_size, number_boxes, 4]. This will insert a "1" dimension
                # in the second axis, to become [batch_size, number_boxes, 1, 4], the shape that BatchedNMS expects.
                box_decoder_tensor = self.graph.unsqueeze(
                    "nms/box_net_reshape", box_decoder_tensor, axes=[2])[0]
            if self.api == "TFOD":
                # The default boxes tensor has shape [4, number_boxes]. This will transpose and insert a "1" dimension
                # in the 0 and 2 axes, to become [1, number_boxes, 1, 4], the shape that BatchedNMS expects.
                box_decoder_tensor = self.graph.transpose(
                    "nms/box_decoder_transpose",
                    box_decoder_tensor,
                    perm=[1, 0])
                box_decoder_tensor = self.graph.unsqueeze(
                    "nms/box_decoder_reshape", box_decoder_tensor, axes=[0,
                                                                         2])[0]

            # BatchedNMS also expects the classes tensor to be already activated, in the case of EfficientDet, this is
            # through a Sigmoid op.
            class_net_tensor = self.graph.sigmoid("nms/class_net_sigmoid",
                                                  class_net_tensor)[0]

            nms_inputs = [box_decoder_tensor, class_net_tensor]
            nms_op = "BatchedNMS_TRT"
            nms_attrs = {
                'plugin_version': "1",
                'shareLocation': True,
                'backgroundLabelId': -1,
                'numClasses': num_classes,
                'topK': 1024,
                'keepTopK': num_detections,
                'scoreThreshold': score_threshold,
                'iouThreshold': iou_threshold,
                'isNormalized': normalized,
                'clipBoxes': False,
                # 'scoreBits': 10, # Some versions of the plugin may need this parameter. If so, uncomment this line.
            }
            nms_output_classes_dtype = np.float32

        # NMS Outputs
        nms_output_num_detections = gs.Variable(name="num_detections",
                                                dtype=np.int32,
                                                shape=[self.batch_size, 1])
        nms_output_boxes = gs.Variable(
            name="detection_boxes",
            dtype=np.float32,
            shape=[self.batch_size, num_detections, 4])
        nms_output_scores = gs.Variable(
            name="detection_scores",
            dtype=np.float32,
            shape=[self.batch_size, num_detections])
        nms_output_classes = gs.Variable(
            name="detection_classes",
            dtype=nms_output_classes_dtype,
            shape=[self.batch_size, num_detections])

        nms_outputs = [
            nms_output_num_detections, nms_output_boxes, nms_output_scores,
            nms_output_classes
        ]

        # Create the NMS Plugin node with the selected inputs. The outputs of the node will also become the final
        # outputs of the graph.
        self.graph.plugin(op=nms_op,
                          name="nms/non_maximum_suppression",
                          inputs=nms_inputs,
                          outputs=nms_outputs,
                          attrs=nms_attrs)
        log.info("Created NMS plugin '{}' with attributes: {}".format(
            nms_op, nms_attrs))

        self.graph.outputs = nms_outputs

        self.infer()
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from collections import OrderedDict
import numpy as np
import onnx
import onnx_graphsurgeon as gs

tensor0 = gs.Variable(name="tensor0", dtype=np.float32, shape=['B', 3, 64,
                                                               64])  # 定义张量(变量)
tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 1, 64, 64])
tensor2 = gs.Variable(name="tensor2", dtype=np.float32,
                      shape=None)  # 可以不知道形状或者数据类型
tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=None)

constant0 = gs.Constant(name="constant0",
                        values=np.ones(shape=[1, 3, 3, 3],
                                       dtype=np.float32))  # 定义张量(常量)
constant1 = gs.Constant(name="constant1",
                        values=np.ones(shape=[1], dtype=np.float32))

node0 = gs.Node(name="myConv",
                op="Conv",
                inputs=[tensor0, constant0],
                outputs=[tensor1])  # 定义节点,使用张量作为输入和输出
Example #24
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

# Computes Y = x0 + (a * x1 + b)

shape = (1, 3, 224, 224)
# Inputs
x0 = gs.Variable(name="x0", dtype=np.float32, shape=shape)
x1 = gs.Variable(name="x1", dtype=np.float32, shape=shape)

# Intermediate tensors
a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
mul_out = gs.Variable(name="mul_out")
add_out = gs.Variable(name="add_out")

# Outputs
Y = gs.Variable(name="Y", dtype=np.float32, shape=shape)

nodes = [
    # mul_out = a * x1
    gs.Node(op="Mul", inputs=[a, x1], outputs=[mul_out]),
    # add_out = mul_out + b
Example #25
0
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import onnx_graphsurgeon as gs
import numpy as np
import onnx

X = gs.Variable(name="X", dtype=np.float32, shape=(1, 3, 224, 224))
# Since W is a Constant, it will automatically be exported as an initializer
W = gs.Constant(name="W", values=np.ones(shape=(5, 3, 3, 3), dtype=np.float32))

Y = gs.Variable(name="Y", dtype=np.float32, shape=(1, 5, 222, 222))

node = gs.Node(op="Conv", inputs=[X, W], outputs=[Y])

# Note that initializers do not necessarily have to be graph inputs
graph = gs.Graph(nodes=[node], inputs=[X], outputs=[Y])
onnx.save(gs.export_onnx(graph), "test_conv.onnx")
nSlice = 0
graph.outputs = []
for node in graph.nodes:
    if node.op == 'Slice' and node.name == 'Slice_74':
        table512x256 = node.inputs[0].values[0]
        for i in range(1, 24, 2):
            factor256x256 = node.o(i).inputs[1].values
            tansposeNode = node.o(i).o().o()

            newTable = np.matmul(table512x256,
                                 factor256x256).transpose().reshape(
                                     1, 4, 64, 512)
            constantData = gs.Constant("wiliConstant-" + str(nSlice),
                                       np.ascontiguousarray(newTable))
            sliceV = gs.Variable(tansposeNode.outputs[0].name,
                                 np.dtype(np.float32), [1, 4, 64, 't4'])
            sliceN = gs.Node(
                "Slice",
                "wiliSliceN-" + str(nSlice),
                inputs=[
                    constantData,  # data
                    wiliConstant0,  # start=0
                    graph.inputs[0],  # end
                    wiliConstant3,  # axes=3
                    wiliConstant1,  # step=1
                ],
                outputs=[sliceV])
            graph.nodes.append(sliceN)
            graph.outputs.append(sliceV)
            nSlice += 1
            tansposeNode.outputs = []
Example #27
0

@gs.Graph.register()
def max(self, *args):
    return self.layer(op="Max", inputs=args, outputs=["max_out"])[0]


@gs.Graph.register()
def identity(self, inp):
    return self.layer(op="Identity", inputs=[inp], outputs=["identity_out"])[0]


# Generate the graph
graph = gs.Graph()

graph.inputs = [gs.Variable("input", shape=(4, 4), dtype=np.float32)]

# Clip values to [0, 6]
MIN_VAL = np.array(0, np.float32)
MAX_VAL = np.array(6, np.float32)

# Add identity nodes to make the graph structure a bit more interesting
inp = graph.identity(graph.inputs[0])
max_out = graph.max(graph.min(inp, MAX_VAL), MIN_VAL)
graph.outputs = [
    graph.identity(max_out),
]

# Graph outputs must include dtype information
graph.outputs[0].to_variable(dtype=np.float32, shape=(4, 4))
Example #28
0
# Note that the same function can be defined in different ways for different opsets.
# It will only be called if the Graph's opset matches one of the opsets for which the function is registered.
# Hence, for the opset 11 graph used in this example, the following function will never be used.
@gs.Graph.register(opsets=[1])
def relu(self, a):
    raise NotImplementedError("This function has not been implemented!")


##########################################################################################################
# The functions registered above greatly simplify the process of building the graph itself.

graph = gs.Graph(opset=11)

# Generates a graph which computes:
# output = ReLU((A * X^T) + B) (.) C + D
X = gs.Variable(name="X", shape=(64, 64), dtype=np.float32)
graph.inputs = [X]

# axt = (A * X^T)
# Note that we can use NumPy arrays directly (e.g. Tensor A),
# instead of Constants. These will automatically be converted to Constants.
A = np.ones(shape=(64, 64), dtype=np.float32)
axt = graph.gemm(A, X, trans_b=True)

# dense = ReLU(axt + B)
B = np.ones((64, 64), dtype=np.float32) * 0.5
dense = graph.relu(*graph.add(*axt, B))

# output = dense (.) C + D
# If a Tensor instance is provided (e.g. Tensor C), it will not be modified at all.
# If you prefer to set the exact names of tensors in the graph, you should
Example #29
0
def replace_combinedNMS(graph,
                        top_k=1284,
                        keep_top_k=100,
                        num_classes=3,
                        plugin_version="1"):
    """
    Although, in principle, the value of top_k, keep_top_k, num_classes should be able to be inferred from the graph.
    Due to the limitation of the ONNX parser, we currently are not able to get these values.
    """

    for node in graph.nodes:
        if node.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression":
            clip_boxes = node.attrs["clip_boxes"]

    tensor_map = graph.tensors()
    model_input_tensor = tensor_map["image_tensor:0"]

    input_boxes = tensor_map["Postprocessor/ExpandDims_1:0"]
    input_scores = tensor_map["Postprocessor/Slice:0"]
    output_boxes = tensor_map["detection_boxes:0"]
    output_scores = tensor_map["detection_scores:0"]
    output_boxes.name = "detection_boxes"
    output_scores.name = "detection_scores"

    batch_size = model_input_tensor.shape[0]

    iou_threshold = tensor_map[
        "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/iou_threshold:0"].values.item(
        )
    score_threshold = tensor_map[
        "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/score_threshold:0"].values.item(
        )

    output_classes = gs.Variable(name="detection_classes_nms:0",
                                 dtype=np.float32,
                                 shape=(batch_size, keep_top_k))
    output_num_detections = gs.Variable(name="num_detections_nms:0",
                                        dtype=np.int32,
                                        shape=(batch_size, 1))

    attributes_ordered_dict = {
        "shareLocation": True,
        "backgroundLabelId": -1,
        "numClasses": num_classes,
        "topK": top_k,
        "keepTopK": keep_top_k,
        "scoreThreshold": score_threshold,
        "iouThreshold": iou_threshold,
        "isNormalized": True,
        "clipBoxes": clip_boxes,
        "plugin_version": plugin_version
    }

    for node in graph.nodes:
        if node.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression":
            node.op = "BatchedNMS_TRT"
            node.attrs = attributes_ordered_dict
            node.inputs = [input_boxes, input_scores]
            node.outputs = [
                output_num_detections, output_boxes, output_scores,
                output_classes
            ]

    for node in graph.nodes:
        if node.name == "add":
            for i, input_tensor in enumerate(node.inputs):
                if input_tensor.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression:2":
                    input_id = i
            node.inputs[input_id] = output_classes

        if node.name == "Postprocessor/Cast_4":
            for i, input_tensor in enumerate(node.inputs):
                if input_tensor.name == "Postprocessor/CombinedNonMaxSuppression/combined_non_max_suppression/CombinedNonMaxSuppression:3":
                    input_id = i
            node.inputs[input_id] = output_num_detections

    graph.nodes[-2].outputs[0].name = "detection_classes"
    graph.nodes[-1].outputs[0].name = "num_detections"
    tensor_map['detection_classes:0'].name = "detection_classes"
    tensor_map["num_detections:0"].name = "num_detections"

    graph.outputs[0].name = "detection_boxes"
    graph.outputs[1].name = "detection_classes"
    graph.outputs[2].name = "detection_scores"
    graph.outputs[3].name = "num_detections"

    return graph
Example #30
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np
import onnx
import onnx_graphsurgeon as gs

tensor0 = gs.Variable(name="tensor0", dtype=np.float32,
                      shape=['B', 3, 64, 64])  # 三个真正有用的张量
tensor1 = gs.Variable(name="tensor1", dtype=np.float32, shape=['B', 3, 64, 64])
tensor2 = gs.Variable(name="tensor2", dtype=np.float32, shape=['B', 3, 64, 64])
tensor3 = gs.Variable(name="tensor3", dtype=np.float32, shape=['B', 3, 64,
                                                               64])  # 一个假输入张量
tensor4 = gs.Variable(name="tensor4", dtype=np.float32, shape=['B', 1, 64,
                                                               64])  # 一个假输出张量
tensor5 = gs.Variable(name="tensor5", dtype=np.float32, shape=['B', 1, 64,
                                                               64])  # 两个无用张量
tensor6 = gs.Variable(name="tensor6", dtype=np.float32, shape=['B', 1, 64, 64])
tensor7 = gs.Variable(name="tensor7", dtype=np.float32, shape=None)  # 中间结果张量
tensor8 = gs.Variable(name="tensor8", dtype=np.float32, shape=None)

constant0 = gs.Constant(name="w",
                        values=np.ones(shape=[1, 1, 1, 1], dtype=np.float32))