Beispiel #1
0
    def add_model(cls, input_file="model.onnx", output_file="add.onnx"):
        """增加节点
        在Sigmoid 前增加 LeakyRelu 节点()
        """
        graph = gs.import_onnx(onnx.load(input_file))

        first_add = [node for node in graph.nodes
                     if node.op == "Sigmoid"][-1]  # 找到最后一个名为 Sigmoid 的节点
        # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"]  # 找到其对应的输入
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        # first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"] # 找到其对应的输入(删除为‘b’的输入节点)

        # 2. Change the Add to a LeakyRelu
        lrelu = gs.Variable('new_lrelu', dtype=np.float32)
        graph.nodes.append(
            gs.Node(op="LeakyRelu",
                    inputs=first_add.inputs,
                    outputs=[lrelu],
                    attrs={"alpha": 0.02}))

        # 此时 sigmoid输入变成了lrelu(输出)
        first_add.inputs.clear()
        first_add.inputs = [lrelu]

        # 5. Remove unused nodes/tensors, and topologically sort the graph
        graph.cleanup().toposort()

        onnx.save(gs.export_onnx(graph), output_file)
    def onnx_change(onnx_path):
        '''该部分代码由导师提供,解决trt inference 全是0的问题,感谢!!!
        '''
        node_configs = [(2682, 2684), (2775, 2777), (2961, 2963), (3333, 3335),
                        (4077, 4079)]
        if 'batch_2' in onnx_path:
            node_number = node_configs[1]
        elif 'batch_4' in onnx_path:
            node_number = node_configs[2]
        elif 'batch_8' in onnx_path:
            node_number = node_configs[3]
        elif 'batch_16' in onnx_path:
            node_number = node_configs[4]
        else:
            node_number = node_configs[0]

        graph = gs.import_onnx(onnx.load(onnx_path))
        for node in graph.nodes:
            if node.name == f"Gather_{node_number[0]}":
                print(node.inputs[1])
                node.inputs[1].values = np.int64(5)
                print(node.inputs[1])
            elif node.name == f"Gather_{node_number[1]}":
                print(node.inputs[1])
                node.inputs[1].values = np.int64(5)
                print(node.inputs[1])

        onnx.save(gs.export_onnx(graph), onnx_path)
        print(f"[INFO] onnx修改完成, 保存在{onnx_path}.")
Beispiel #3
0
def modify_onnx(onnx_model_file):
    graph = gs.import_onnx(onnx.load(onnx_model_file))
    assert (graph is not None)

    for node in graph.nodes:
        if node.op == 'GridSampler':
            _, c, h, w = node.inputs[0].shape
            _, h_g, w_g, _ = node.inputs[1].shape
            align_corners = node.attrs['aligncorners']
            inter_mode = node.attrs['interpolationmode']
            pad_mode = node.attrs['paddingmode']
            m_type = 0 if node.inputs[0].dtype == np.float32 else 1
            buffer = np.array([c, h, w, h_g, w_g], dtype=np.int64).tobytes('C') \
              + np.array([inter_mode, pad_mode], dtype=np.int32).tobytes('C') \
              + np.array([align_corners], dtype=np.bool).tobytes('C') \
              + np.array([m_type], dtype=np.int32).tobytes('C')
            node.attrs = {
                'name': 'GridSampler',
                'version': '1',
                'namespace': "",
                'data': buffer
            }
            node.op = 'TRT_PluginV2'

    onnx.save(gs.export_onnx(graph), onnx_model_file)
def clamp_weights_onnx(onnx_input_fpath: str,
                       onnx_output_fpath: str,
                       min: float,
                       max: float,
                       ignore_nodes: List = None):
    """
    Clamps given onnx model to targeted upper and lower bounds.
    """

    graph = gs.import_onnx(onnx.load(onnx_input_fpath))
    if ignore_nodes is None:
        ignore_nodes = {}
    else:
        ignore_nodes = {k: True for k in ignore_nodes}

    for tensor in graph.tensors().values():
        if tensor.name in ignore_nodes or isinstance(tensor,
                                                     gs.ir.tensor.Variable):
            continue

        np.clip(tensor.values, min, max, out=tensor.values)

    for tensor in graph.nodes:
        node_attr = tensor.attrs.get("value", None)
        if tensor.name in ignore_nodes:
            continue

        if node_attr is not None:
            np.clip(node_attr.values, min, max, out=node_attr.values)

    model = gs.export_onnx(graph)
    onnx.save(model, onnx_output_fpath)
Beispiel #5
0
 def test_reduce_with_constant(self):
     # Should be no failure when models including Constant nodes use fallback
     # shape inference; Constant nodes will be lowered to constant tensors.
     with tempfile.TemporaryDirectory() as outdir:
         run_polygraphy_debug(
             [
                 "reduce",
                 ONNX_MODELS["reducable_with_const"].path,
                 "--no-shape-inference",
                 "--mode=linear",
                 "--output=reduced.onnx",
             ] + [
                 "--check",
                 TestReduce.FAKE_REDUCE_CHECKER,
                 "polygraphy_debug.onnx",
                 "--fail-node",
                 "onnx_graphsurgeon_node_3",
             ],
             disable_verbose=True,
             cwd=outdir,
         )
         model = onnx_from_path(os.path.join(outdir, "reduced.onnx"))
         graph = gs.import_onnx(model)
         assert len(graph.nodes) == 1
         assert graph.nodes[0].name == "onnx_graphsurgeon_node_3"
         # Outputs of Constant nodes should not become Variables; thus the model should have no inputs.
         assert not graph.inputs
Beispiel #6
0
def post_process_packnet(model_file, opset=11):
    """
    Use ONNX graph surgeon to replace upsample and instance normalization nodes. Refer to post_processing.py for details.
    Args:
        model_file : Path to ONNX file
    """
    # Load the packnet graph
    graph = gs.import_onnx(onnx.load(model_file))

    if opset == 11:
        graph = process_pad_nodes(graph)

    # Replace the subgraph of upsample with a single node with input and scale factor.
    graph = process_upsample_nodes(graph, opset)

    # Convert the group normalization subgraph into a single plugin node.
    graph = process_groupnorm_nodes(graph)

    # Remove unused nodes, and topologically sort the graph.
    graph.cleanup().toposort()

    # Export the onnx graph from graphsurgeon
    onnx.save_model(gs.export_onnx(graph), model_file)

    print("Saving the ONNX model to {}".format(model_file))
Beispiel #7
0
    def infer(self):
        """
        Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values.
        When possible, run shape inference on the ONNX graph to determine tensor shapes.
        """
        for i in range(3):
            count_before = len(self.graph.nodes)

            self.graph.cleanup().toposort()
            try:
                for node in self.graph.nodes:
                    for o in node.outputs:
                        o.shape = None
                model = gs.export_onnx(self.graph)
                model = shape_inference.infer_shapes(model)
                self.graph = gs.import_onnx(model)
            except Exception as e:
                log.info(
                    "Shape inference could not be performed at this time:\n{}".
                    format(e))
            try:
                self.graph.fold_constants(fold_shapes=True)
            except TypeError as e:
                log.error(
                    "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your "
                    "onnx_graphsurgeon module. Error:\n{}".format(e))
                raise

            count_after = len(self.graph.nodes)
            if count_before == count_after:
                # No new folding occurred in this iteration, so we can stop for now.
                break
Beispiel #8
0
def modify(input: str, output: str, downsample_ratio: float = 0.25) -> None:
    print(f'\nonnx load: {input}')
    graph = gs.import_onnx(onnx.load(input))

    _print_graph(graph)

    # update node Resize_3: scales
    resize_3 = [n for n in graph.nodes if n.name == 'Resize_3'][0]
    print()
    print(resize_3)

    scales = gs.Constant(
        '388',
        np.asarray([1, 1, downsample_ratio, downsample_ratio],
                   dtype=np.float32))

    resize_3.inputs = [
        i if i.name != '388' else scales for i in resize_3.inputs
    ]
    print()
    print(resize_3)

    # remove input downsample_ratio
    graph.inputs = [i for i in graph.inputs if i.name != 'downsample_ratio']

    # remove node Concat_2
    concat_2 = [n for n in graph.nodes if n.name == 'Concat_2'][0]
    concat_2.outputs.clear()

    # remove unused nodes/tensors
    graph.cleanup()

    onnx.save(gs.export_onnx(graph), output)
Beispiel #9
0
def modify_onnx(onnx_model_filepath="vot_opset_10.onnx",
                modified_onnx_model_filepath="vot_opset_10_modified.onnx"):
    """Modifies onnx model to fix issues with running on TRT.

  Args:
    onnx_model_filepath: Input onnx file path.
    modified_onnx_model_filepath: Output onnx file path.

  Raises:
    Exception: There are still uint8's that have not been converted.
  """
    orig_model = onnx.load(onnx_model_filepath)

    inferred_model = onnx.shape_inference.infer_shapes(orig_model)
    graph = gs.import_onnx(inferred_model)

    if count_uint8(graph=graph) > 0:
        print("Fixing UINT8 issues...")
        graph = fix_uint8_tensors(graph=graph)

        if count_uint8(graph=graph) > 0:
            raise Exception("UINT8 issue has not been fixed!")
        else:
            print("UINT8 issue has been fixed!")

    print("Replacing CombinedNMS to BatchedNMS...")
    graph = replace_combined_nms(graph=graph)

    onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath)
    print("CombinedNMS has been replaced to BatchedNMS!")
    def __init__(
            self,
            checkpoint_path: str,
            *,
            version: str = "r6.0",
            input_sample: Optional[Tensor] = None,
            enable_dynamic: bool = False,
            device: torch.device = torch.device("cpu"),
            precision: str = "fp32",
    ):
        checkpoint_path = Path(checkpoint_path)
        assert checkpoint_path.exists()

        # Use YOLOTRTInference to convert saved model to an initial ONNX graph.
        model = YOLOTRTInference(checkpoint_path, version=version)
        model = model.eval()
        model = model.to(device=device)
        logger.info(f"Loaded saved model from {checkpoint_path}")

        onnx_model_path = checkpoint_path.with_suffix(".onnx")
        if input_sample is not None:
            input_sample = input_sample.to(device=device)
        model.to_onnx(onnx_model_path,
                      input_sample=input_sample,
                      enable_dynamic=enable_dynamic)
        self.graph = gs.import_onnx(onnx.load(onnx_model_path))
        assert self.graph
        logger.info("PyTorch2ONNX graph created successfully")

        # Fold constants via ONNX-GS that PyTorch2ONNX may have missed
        self.graph.fold_constants()
        self.num_classes = model.num_classes
        self.batch_size = 1
        self.precision = precision
Beispiel #11
0
    def modeify_model2(cls, input_file="model.onnx", output_file="add.onnx"):
        """重新修改resize的实现
        """
        graph = gs.import_onnx(onnx.load(input_file))

        first_add = [node for node in graph.nodes
                     if node.op == "LeakyRelu"][0]  # 找到 LeakyRelu 的节点
        # first_add = [node for node in graph.nodes if node.name == "LeakyRelu_2"][0]  # 找到 LeakyRelu 的节点
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        # first_add.outputs = [inp for inp in first_add.outputs]  # 找到其对应的输出
        first_add.outputs.clear(
        )  # 必须执行,clear 删除掉输出的相关链接 ,但也导致 LeakyRelu 没有了输出,因此必须重新实现生成新的输出
        # graph.nodes.remove(first_add) # 删除整个节点

        second_add = [node for node in graph.nodes if node.op == "MaxPool"][0]
        # second_add = [node for node in graph.nodes if node.name == "MaxPool_32"][0]
        second_add.inputs.clear()  # 必须执行,clear 删除掉输入的相关链接,后面得重新指定其输入

        # 重新定义LeakyRelu层
        attrs = {"alpha": 0.1}
        lrelu = gs.Variable("new_lrelu", np.float32)
        node = gs.Node(op="LeakyRelu",
                       inputs=first_add.inputs,
                       outputs=[lrelu],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新定义resize层(实现upsample)
        attrs = {
            "coordinate_transformation_mode": 'asymmetric',
            "mode": 'nearest',
            "nearest_mode": 'floor',
        }
        layer_name = "new_resize"  # 不要和原来 的resize节点名重复
        scales = np.array([1.0, 1.0, 2, 2]).astype(np.float32)
        scale_name = layer_name + ".scale"
        roi_name = layer_name + ".roi"
        scale = gs.Constant(scale_name, scales)
        roi = gs.Constant(roi_name, np.asarray([0, 0, 0, 0], np.float32))
        # inputs =first_add.outputs
        inputs = [lrelu]
        inputs.append(roi)
        inputs.append(scale)
        resize = gs.Variable(layer_name, dtype=np.float32)
        node = gs.Node(op="Resize",
                       inputs=inputs,
                       outputs=[resize],
                       attrs=attrs)
        graph.nodes.append(node)

        # 重新设置下一层的输入节点
        second_add.inputs = [resize]

        # 5. Remove unused nodes/tensors, and topologically sort the graph
        graph.cleanup().toposort()

        onnx.save(gs.export_onnx(graph), output_file)
Beispiel #12
0
    def test_cast_elision(self):
        graph = gs.import_onnx(shape_cast_elision().load())
        new_graph = graph.fold_constants()
        no_casts = True

        for node in new_graph.nodes:
            no_casts &= node.op != "Cast"

        assert no_casts
Beispiel #13
0
 def test_reduce_shape_inference(self, opts):
     with tempfile.TemporaryDirectory() as outdir:
         status = run_polygraphy_debug(["reduce", ONNX_MODELS["dynamic_identity"].path, "--output=reduced.onnx",
                                        "--show-output", "--model-input-shapes=X:[1,2,5,5]"] + opts
                                        + ["--check", "false"],
                                        disable_verbose=True, cwd=outdir)
         model = onnx_from_path(os.path.join(outdir, "reduced.onnx"))
         graph = gs.import_onnx(model)
         assert tuple(graph.inputs[0].shape) == (1, 2, 5, 5)
         assert tuple(graph.outputs[0].shape) == (1, 2, 5, 5)
Beispiel #14
0
 def test_force_fallback_shape_inference_will_override_model_shapes(self):
     with tempfile.NamedTemporaryFile() as outmodel:
         run_polygraphy_surgeon(["extract", ONNX_MODELS["dynamic_identity"].path, "-o", outmodel.name, "--outputs", "Y:auto", "--force-fallback-shape-inference"])
         onnx_model_sanity_check(outmodel.name)
         graph = gs.import_onnx(onnx.load(outmodel.name))
         # Inputs should become fixed since fallback shape inference is being forced.
         for tensor in graph.tensors().values():
             assert tensor.shape is not None
         assert tuple(graph.inputs[0].shape) == (1, 2, 1, 1)
         assert tuple(graph.outputs[0].shape) == (1, 2, 1, 1)
Beispiel #15
0
 def __init__(self, onnx_model_file, req_jsons, dynamic_batch):
     self.onnx_model_file = onnx_model_file
     self.onnx_model_fixed_file = self.onnx_model_file.split(
         '.onnx')[0] + '_tuned.onnx'
     self.onnx_model = onnx.load(onnx_model_file)
     self.onnx_model_fixed = None
     self.req_jsons = req_jsons
     self.req_json_dicts = []
     self.graph = gs.import_onnx(self.onnx_model)
     self.dynamic_batch = dynamic_batch
     self._validate_requests()
Beispiel #16
0
    def test_extract_onnx_gs_graph(self, extract_model):
        model, input_meta, output_meta = extract_model
        graph = gs.import_onnx(model)
        graph = extract_subgraph(graph, input_meta, output_meta)
        assert isinstance(graph, gs.Graph)
        assert len(graph.nodes) == 1

        assert len(graph.inputs) == 1
        assert graph.inputs[0].name == "X"

        assert len(graph.outputs) == 1
        assert graph.outputs[0].name == "identity_out_0"
Beispiel #17
0
    def check_model(self, model):
        graph = gs.import_onnx(model)
        assert len(graph.nodes) == 1

        assert len(graph.inputs) == 1
        assert graph.inputs[0].name == "X"
        assert graph.inputs[0].shape is not None
        assert graph.inputs[0].dtype is not None

        assert len(graph.outputs) == 1
        assert graph.outputs[0].name == "identity_out_0"
        assert graph.outputs[0].dtype is not None
def main():
    parser = argparse.ArgumentParser(description="Add batchedNMSPlugin")
    parser.add_argument("-f", "--model", help="Path to the ONNX model generated by export_model.py", default="yolov4_1_3_416_416.onnx")
    parser.add_argument("-t", "--topK", help="number of bounding boxes for nms", default=2000)
    parser.add_argument("-k", "--keepTopK", help="bounding boxes to be kept per image", default=1000)

    args, _ = parser.parse_known_args()

    graph = gs.import_onnx(onnx.load(args.model))
    
    graph = create_and_add_plugin_node(graph, int(args.topK), int(args.keepTopK))
    
    onnx.save(gs.export_onnx(graph), args.model + ".nms.onnx")
Beispiel #19
0
def infer_model(path):
    model = onnx.load(path)
    graph = gs.import_onnx(model)

    feed_dict = {}
    for tensor in graph.inputs:
        feed_dict[tensor.name] = np.random.random_sample(size=tensor.shape).astype(tensor.dtype)

    output_names = [out.name for out in graph.outputs]

    sess = onnxruntime.InferenceSession(model.SerializeToString())
    outputs = sess.run(output_names, feed_dict)
    G_LOGGER.info("Inference outputs: {:}".format(outputs))
    return outputs
Beispiel #20
0
    def test_no_load_constants(self):
        graph = gs.import_onnx(const_foldable().load())

        new_graph = graph.fold_constants()

        def check_no_const_loaded(graph):
            num_lazy_constants = 0
            for tensor in graph.tensors().values():
                if isinstance(tensor, Constant) and isinstance(
                        tensor._values, LazyValues):
                    num_lazy_constants += 1
            assert num_lazy_constants == 3  # Graph starts with 3 constants - none should be loaded.

        check_no_const_loaded(graph)
        check_no_const_loaded(new_graph)
Beispiel #21
0
    def __init__(self, saved_model_path, legacy_plugins=False):
        """
        Constructor of the EfficientDet Graph Surgeon object, to do the conversion of an EfficientDet TF saved model
        to an ONNX-TensorRT parsable model.
        :param saved_model_path: The path pointing to the TensorFlow saved model to load.
        :param legacy_plugins: If using TensorRT version < 8.0.1, set this to True to use older (but slower) plugins.
        """
        saved_model_path = os.path.realpath(saved_model_path)
        assert os.path.exists(saved_model_path)

        # Use tf2onnx to convert saved model to an initial ONNX graph.
        graph_def, inputs, outputs = tf_loader.from_saved_model(
            saved_model_path, None, None, "serve", ["serving_default"])
        log.info("Loaded saved model from {}".format(saved_model_path))
        with tf.Graph().as_default() as tf_graph:
            tf.import_graph_def(graph_def, name="")
        with tf_loader.tf_session(graph=tf_graph):
            onnx_graph = tfonnx.process_tf_graph(tf_graph,
                                                 input_names=inputs,
                                                 output_names=outputs,
                                                 opset=11)
        onnx_model = optimizer.optimize_graph(onnx_graph).make_model(
            "Converted from {}".format(saved_model_path))
        self.graph = gs.import_onnx(onnx_model)
        assert self.graph
        log.info("TF2ONNX graph created successfully")

        # Fold constants via ONNX-GS that TF2ONNX may have missed
        self.graph.fold_constants()

        # Try to auto-detect by finding if nodes match a specific name pattern expected for either of the APIs.
        self.api = None
        if len(
            [node
             for node in self.graph.nodes if "class_net/" in node.name]) > 0:
            self.api = "AutoML"
        elif len([
                node for node in self.graph.nodes
                if "/WeightSharedConvolutionalClassHead/" in node.name
        ]) > 0:
            self.api = "TFOD"
        assert self.api
        log.info("Graph was detected as {}".format(self.api))

        self.batch_size = None
        self.legacy_plugins = legacy_plugins
Beispiel #22
0
    def resize_model(cls,
                     input_file="model.onnx",
                     output_file="subgraph.onnx"):
        """修改模型的输入与输出(截断输入输出)"""
        model = onnx.load(input_file)
        graph = gs.import_onnx(model)
        # tensors = graph.tensors()
        # 重新设置模型的输入与输出
        # graph.inputs = [tensors['x'].to_variable(np.float32)]
        # graph.outputs = [tensors['sigmoid'].to_variable(np.float32,shape=(1,8))] # 原本输出节点名为"softmax"
        first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1]
        graph.outputs = [
            first_add.outputs[0].to_variable(np.float32, shape=(1, 8))
        ]

        graph.cleanup()
        onnx.save(gs.export_onnx(graph), output_file)
def main():
    parser = argparse.ArgumentParser(
        description="Insert DCNv2 plugin node into ONNX model")
    parser.add_argument(
        "-i",
        "--input",
        help="Path to ONNX model with 'Plugin' node to replace with DCNv2_TRT",
        default="models/centertrack_DCNv2_named.onnx")
    parser.add_argument("-o",
                        "--output",
                        help="Path to output ONNX model with 'DCNv2_TRT' node",
                        default="models/modified.onnx")

    args, _ = parser.parse_known_args()
    graph = gs.import_onnx(onnx.load(args.input))
    graph = process_graph(graph)
    onnx.save(gs.export_onnx(graph), args.output)
Beispiel #24
0
def main():
    org_model_file_path = getFilePath('samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx')

    print("Modifying the ONNX model ...")
    original_model = onnx.load(org_model_file_path)
    graph = gs.import_onnx(original_model)

    drop_category_mapper_nodes(graph)
    replace_unsupported_ops(graph)
    save_weights_for_refitting(graph)

    new_model = gs.export_onnx(graph)

    modified_model_name = "bidaf-modified.onnx"
    onnx.checker.check_model(new_model)
    onnx.save(new_model, modified_model_name)
    print("Modified ONNX model saved as {}".format(modified_model_name))
    print("Done.")
Beispiel #25
0
    def remove_model(cls, input_file="model.onnx", output_file="removed.onnx"):
        """删除某个节点
        删除sigmoid节点
        """
        graph = gs.import_onnx(onnx.load(input_file))
        first_add = [node for node in graph.nodes if node.op == "Sigmoid"][-1]
        # first_add.inputs = [inp for inp in first_add.inputs if inp.name == "fc"]  # 找到其对应的输入
        # first_add.inputs = [inp for inp in first_add.inputs]  # 找到其对应的输入
        first_add.outputs.clear()

        # 找到下一个节点 重新设置输入
        next_add = [node for node in graph.nodes
                    if node.op == "Softmax"][-1]  # 找到最后一个名为 Sigmoid 的节点
        next_add.inputs.clear()  # 先清除,再重新指定
        next_add.inputs = first_add.inputs  # 重新设置输入

        # Remove the fake node from the graph completely
        graph.cleanup().toposort()
        onnx.save(gs.export_onnx(graph), output_file)
Beispiel #26
0
def modify_onnx(onnx_model_filepath="vot_opset_10.onnx",
                modified_onnx_model_filepath="vot_opset_10_modified.onnx"):
    orig_model = onnx.load(onnx_model_filepath)

    inferred_model = onnx.shape_inference.infer_shapes(orig_model)
    graph = gs.import_onnx(inferred_model)

    if count_uint8(graph=graph) > 0:
        print("Fixing UINT8 issues...")
        graph = fix_uint8_tensors(graph=graph)

        if count_uint8(graph=graph) > 0:
            raise Exception("UINT8 issue has not been fixed!")
        else:
            print("UINT8 issue has been fixed!")

    print("Replacing CombinedNMS to BatchedNMS...")
    graph = replace_combinedNMS(graph=graph)

    onnx.save(gs.export_onnx(graph.cleanup()), modified_onnx_model_filepath)
    print("CombinedNMS has been replaced to BatchedNMS!")
node7 = gs.Node("Conv",
                "Conv1",
                inputs=[tensorLoop, constant1x32],
                outputs=[tensor7])
graphNodeList.append(node7)

graph = gs.Graph(nodes=graphNodeList,
                 inputs=[tensor0],
                 outputs=[tensor7],
                 opset=13)

onnx.save(gs.export_onnx(graph.cleanup().toposort()), onnxFile0)
print("Succeeded building %s!" % (onnxFile0))

# 修改 .onnx
graph = gs.import_onnx(onnx.load(onnxFile0))

constant32r = gs.Constant(
    "constant32r",
    np.ascontiguousarray(
        np.random.rand(1, nC, 1, 1).reshape(1, nC, 1, 1).astype(np.float32) *
        2 - 1))

for node in graph.nodes:
    if node.op in ['Unsqueeze', 'Squeeze']:
        node.o().inputs[0] = node.inputs[0]

    if node.op == 'Transpose':
        if node.o().op == 'Add':
            node.o().inputs[1] = constant32r
        node.o().inputs[0] = node.inputs[0]
Beispiel #28
0
        conv_weights_transposed = np.transpose(conv_weights_tensor.values,
                                               [3, 2, 0, 1])
        conv_weights_tensor.values = conv_weights_transposed

        # Remove the transpose nodes after the dequant node. TensorRT does not support transpose nodes after QDQ nodes.
        dequant_node_output = node.i(1).i(0).outputs[0]
        node.inputs[1] = dequant_node_output

    # Remove unused nodes, and topologically sort the graph.
    return graph.cleanup().toposort()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        "Post process ONNX graph by removing transpose nodes")
    parser.add_argument("--input", required=True, help="Input onnx graph")
    parser.add_argument("--output",
                        default='postprocessed_rn50.onnx',
                        help="Name of post processed onnx graph")
    args = parser.parse_args()

    # Load the rn50 graph
    graph = gs.import_onnx(onnx.load(args.input))

    # Remove the transpose nodes and reshape the convolution weights
    graph = process_transpose_nodes(graph)

    # Export the onnx graph from graphsurgeon
    onnx_model = gs.export_onnx(graph)
    print("Output ONNX graph generated: ", args.output)
    onnx.save_model(onnx_model, args.output)
def main():
    tf.set_random_seed(1234)
    np.random.seed(0)
    iterations = 100
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        batch_size = 16
        input_data = np.random.rand(batch_size, 256).astype(np.float32)
        input_ph = tf.placeholder(dtype=tf.float32,
                                  shape=[batch_size, 256],
                                  name="input")

        x = tf.layers.dense(input_ph, 256)

        # test one_hot
        depth = 256
        indices = tf.cast(tf.clip_by_value(tf.reshape(x, [-1]), 0, depth - 1),
                          tf.int32)
        x = tf.one_hot(indices, depth)
        x = tf.reshape(x, [batch_size, -1])
        x = tf.layers.dense(x, 256)

        output = tf.identity(x, name="output")
        sess.run(tf.global_variables_initializer())

        time_sum = 0
        a = datetime.now()
        for i in range(iterations):
            tf_result = sess.run([output], {input_ph: input_data})
        b = datetime.now()
        time_sum = (b - a).total_seconds()
        tf_time = "[INFO] TF  execution time " + str(
            time_sum * 1000 / iterations) + " ms"
        print(tf_time)

        output_name_without_port = ["output"]
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            sess, sess.graph_def, output_name_without_port)
        # save frozen model
        with open("test_op.pb", "wb") as ofile:
            ofile.write(frozen_graph.SerializeToString())

    model_file = "test_op.onnx"
    os.system(
        "python3 -m tf2onnx.convert --input test_op.pb --inputs input:0 --outputs output:0 --output test_op.onnx --verbose --opset 11"
    )

    ### use ONNX GraphSurgeon
    # ONNX operator is required to keep aligned (like name, inputs, outputs and attributes) with TensorRT plugin to use Fallback mechanism.
    # ONNX GraphSurgeon is useful for modification and you can install it by the following commands.
    # pip install nvidia-pyindex
    # pip install onnx-graphsurgeon
    graph = gs.import_onnx(onnx.load(model_file))
    graph = modify_onehot(graph)
    model_file = "test_op_onehot.onnx"
    onnx.save(gs.export_onnx(graph), model_file)

    # build trt model by onnx model
    cuda.Device(0).make_context()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    ) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_batch_size = batch_size

        with open(model_file, 'rb') as model:
            # parse onnx model
            parser.parse(model.read())
            for i in range(parser.num_errors):
                print(parser.get_error(i))

        engine = builder.build_engine(network, builder.create_builder_config())
        if engine == None:
            print("[ERROR] engine is None")
            exit(-1)
        inputs, outputs, bindings, stream = allocate_buffers(engine)
        with engine.create_execution_context() as context:
            input_data = input_data.ravel()
            np.copyto(inputs[0].host, input_data)

            time_sum = 0
            a = datetime.now()
            for i in range(iterations):
                np.copyto(inputs[0].host, input_data)
                output = do_inference(context,
                                      bindings=bindings,
                                      inputs=inputs,
                                      outputs=outputs,
                                      stream=stream,
                                      batch_size=batch_size)
            b = datetime.now()
            time_sum = (b - a).total_seconds()
            trt_time = ("TRT execution time " +
                        str(time_sum * 1000 / iterations) + " ms")
            trt_result = output

    for i in range(len(trt_result)):
        print(
            "trt cross_check output_%d " % i +
            str(np.allclose(tf_result[i].flatten(), trt_result[i], atol=1e-5)))
        print("max diff " +
              str(np.fabs(tf_result[i].flatten() - trt_result[i]).max()))
        print("min diff " +
              str(np.fabs(tf_result[i].flatten() - trt_result[i]).min()))

    print(tf_time)
    print(trt_time)

    cuda.Context.pop()
output_names = ['y']
torch.onnx.export(
    custom, (x, grid),
    src_onnx,
    input_names=input_names,
    output_names=output_names,
    opset_version=11,
    verbose=True,
    operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH,
    do_constant_folding=False)

import onnx_graphsurgeon as gs
import onnx
import numpy as np

graph = gs.import_onnx(onnx.load(src_onnx))

for node in graph.nodes:
    if node.op == 'Resize':
        # actually not used in this sample
        node_concat = node.i(2, 0)
        node_concat.i(0, 0).attrs['value'] = gs.Constant(
            '',
            np.concatenate((node_concat.i(0, 0).attrs['value'].values,
                            node_concat.i(1, 0).attrs['value'].values)))
        node.inputs[2] = node_concat.inputs[0]
        node_concat.outputs.clear()

    if node.op == 'Clip':
        node_cast0 = node.i(1, 0)
        node_cast1 = node.i(2, 0)