def optimize_fp16_onnx_with_cast(input_onnx_path, optimized_onnx_path,
                                 epsilon):
    m = onnx.load(input_onnx_path)
    onnx_model = OnnxModel(m)

    nodes_to_remove = onnx_model.nodes()
    nodes_to_add = [
        onnx.helper.make_node("Cast", ["input"], ["fp32_input"],
                              "cast_input",
                              to=1),
        onnx.helper.make_node("Cast", ["layer_norm.weight"],
                              ["fp32_layer_norm.weight"],
                              "cast_weight",
                              to=1),
        onnx.helper.make_node("Cast", ["layer_norm.bias"],
                              ["fp32_layer_norm.bias"],
                              "cast_bias",
                              to=1),
        onnx.helper.make_node(
            "LayerNormalization",
            ["fp32_input", "fp32_layer_norm.weight", "fp32_layer_norm.bias"],
            ["fp32_output"],
            "layer_norm",
            epsilon=epsilon),  # use fp32 epsilon
        onnx.helper.make_node("Cast", ["fp32_output"], ["output"],
                              "cast_output",
                              to=10)
    ]

    onnx_model.remove_nodes(nodes_to_remove)
    onnx_model.add_nodes(nodes_to_add)
    onnx_model.prune_graph()
    onnx_model.save_model_to_file(optimized_onnx_path)
def optimize_fp16_onnx_no_cast(input_onnx_path, optimized_onnx_path, epsilon):
    m = onnx.load(input_onnx_path)
    onnx_model = OnnxModel(m)

    nodes_to_remove = onnx_model.nodes()
    node_to_add = onnx.helper.make_node(
        "LayerNormalization",
        ["input", "layer_norm.weight", "layer_norm.bias"], ["output"],
        "layer_norm",
        epsilon=epsilon)

    onnx_model.remove_nodes(nodes_to_remove)
    onnx_model.add_node(node_to_add)
    onnx_model.prune_graph()
    onnx_model.save_model_to_file(optimized_onnx_path)
def optimize_fp16_onnx_no_cast(input_onnx_path, optimized_onnx_path, epsilon):
    m = onnx.load(input_onnx_path)
    onnx_model = OnnxModel(m)

    weight_name = get_weight(onnx_model)
    bias_name = get_bias(onnx_model)
    nodes_to_remove = [n for n in onnx_model.nodes() if n.output[0] != weight_name and n.output[0] != bias_name]

    nodes_to_remove = onnx_model.nodes()
    node_to_add = onnx.helper.make_node("LayerNormalization", ["input", weight_name, bias_name], ["output"],
                                        "layer_norm",
                                        epsilon=epsilon)

    onnx_model.remove_nodes(nodes_to_remove)
    onnx_model.add_node(node_to_add)
    onnx_model.prune_graph()
    onnx_model.save_model_to_file(optimized_onnx_path)
Exemple #4
0
def optimize_fp16_onnx_with_cast(input_onnx_path, optimized_onnx_path,
                                 epsilon):
    m = onnx.load(input_onnx_path)
    onnx_model = OnnxModel(m)
    weight_name = get_weight(onnx_model)
    bias_name = get_bias(onnx_model)
    nodes_to_remove = [
        n for n in onnx_model.nodes()
        if n.output[0] != weight_name and n.output[0] != bias_name
    ]
    nodes_to_add = [
        onnx.helper.make_node("Cast", ["input"], ["fp32_input"],
                              "cast_input",
                              to=1),
        onnx.helper.make_node("Cast", [weight_name],
                              ["fp32_layer_norm.weight"],
                              "cast_weight",
                              to=1),
        onnx.helper.make_node("Cast", [bias_name], ["fp32_layer_norm.bias"],
                              "cast_bias",
                              to=1),
        onnx.helper.make_node(
            "LayerNormalization",
            ["fp32_input", "fp32_layer_norm.weight", "fp32_layer_norm.bias"],
            ["fp32_output"],
            "layer_norm",
            epsilon=epsilon,
        ),  # use fp32 epsilon
        onnx.helper.make_node("Cast", ["fp32_output"], ["output"],
                              "cast_output",
                              to=10),
    ]

    onnx_model.remove_nodes(nodes_to_remove)
    onnx_model.add_nodes(nodes_to_add)
    onnx_model.prune_graph()
    onnx_model.save_model_to_file(optimized_onnx_path)