コード例 #1
0
    def test_quantize_concat(self):
        np.random.seed(1)

        model_fp32_path = 'concat_fp32.onnx'
        model_uint8_path = 'concat_uint8.onnx'
        model_uint8_qdq_path = 'concat_uint8_qdq.onnx'

        self.construct_model(model_fp32_path)

        # Verify QOperator mode
        data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        qnode_counts = {'QLinearConv': 3, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'QLinearConcat': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 5, 'DequantizeLinear': 8, 'Concat': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
コード例 #2
0
    def run_quantize_squeezes_of_opset(self, opset = 13):
        np.random.seed(1)

        model_fp32_path = 'squeezes_opset{}_fp32.onnx'.format(opset)
        model_uint8_path = 'squeezes_opset{}_uint8.onnx'.format(opset)
        model_uint8_qdq_path = 'squeezes_opset{}_uint8_qdq.onnx'.format(opset)

        self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], opset=opset)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure squeezes become xint8 operator, its input name could tell that
        qnode_counts = {'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next(), rtol=0.01, atol=0.5)

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 3, 'QuantizeLinear': 8, 'DequantizeLinear': 11}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next(), rtol=0.01, atol=0.5)
コード例 #3
0
    def perform_quantization(self, activations, weight, act_sym, wgt_sym):

        # One-layer convolution model
        act = helper.make_tensor_value_info("ACT", TensorProto.FLOAT,
                                            activations[0].shape)
        wgt = helper.make_tensor_value_info("WGT", TensorProto.FLOAT,
                                            weight.shape)
        res = helper.make_tensor_value_info("RES", TensorProto.FLOAT,
                                            [None, None, None, None])
        wgt_init = numpy_helper.from_array(weight, "WGT")
        conv_node = onnx.helper.make_node("Conv", ["ACT", "WGT"], ["RES"])
        graph = helper.make_graph([conv_node],
                                  "test", [act], [res],
                                  initializer=[wgt_init])
        model = helper.make_model(graph,
                                  opset_imports=[helper.make_opsetid("", 11)])
        onnx.save(model, "model.onnx")

        # Quantize model
        class DummyDataReader(quantization.CalibrationDataReader):
            def __init__(self):
                self.iterator = ({"ACT": act} for act in activations)

            def get_next(self):
                return next(self.iterator, None)

        quantization.quantize_static(
            model_input="model.onnx",
            model_output="quantized-model.onnx",
            calibration_data_reader=DummyDataReader(),
            quant_format=quantization.QuantFormat.QOperator,
            activation_type=quantization.QuantType.QInt8,
            weight_type=quantization.QuantType.QInt8,
            op_types_to_quantize=["Conv", "MatMul"],
            extra_options={
                "WeightSymmetric": wgt_sym,
                "ActivationSymmetric": act_sym
            })

        # Extract quantization parameters: scales and zero points for activations, weights, and results
        model = onnx.load("quantized-model.onnx")
        act_zp = [
            init for init in model.graph.initializer
            if init.name == "ACT_zero_point"
        ][0].int32_data[0]
        act_sc = [
            init for init in model.graph.initializer
            if init.name == "ACT_scale"
        ][0].float_data[0]
        wgt_zp = [
            init for init in model.graph.initializer
            if init.name == "WGT_zero_point"
        ][0].int32_data[0]
        wgt_sc = [
            init for init in model.graph.initializer
            if init.name == "WGT_scale"
        ][0].float_data[0]

        # Return quantization parameters
        return act_zp, act_sc, wgt_zp, wgt_sc
コード例 #4
0
def main():
    input_model_path = './resnet50_v1.onnx'
    output_model_path = './calibrated_quantized_model.onnx'
    calibration_dataset_path = './test_images'
    dr = ResNet50DataReader(calibration_dataset_path)
    quantize_static(input_model_path, output_model_path, dr)
    print('Calibrated and quantized model saved.')
コード例 #5
0
    def test_quantize_resize(self):
        np.random.seed(1)

        model_fp32_path = 'resize_fp32.onnx'
        model_uint8_path = 'resize_uint8.onnx'
        model_uint8_qdq_path = 'resize_uint8_qdq.onnx'

        kwargs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'}
        self.construct_model_conv_resize(model_fp32_path,
                                         [1, 2, 26, 42], [3, 2, 3, 3],
                                         [1, 3, 24, 40], [1, 3, 48, 80],
                                         kwargs,
                                         [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure resize become xint8 operator, its input name could tell that
        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != 'conv_output'))
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'Resize': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Resize': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
コード例 #6
0
    def test_quantize_avgpool(self):
        np.random.seed(1)

        model_fp32_path = 'avgpool_fp32.onnx'
        model_uint8_path = 'avgpool_uint8.onnx'
        model_uint8_qdq_path = 'avgpool_uint8_qdq.onnx'

        self.construct_model_conv_avgpool(model_fp32_path,
                                          [1, 2, 26, 42], [3, 2, 3, 3],
                                          [1, 3, 24, 40], {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)
        qnode_counts = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'AveragePool': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
コード例 #7
0
    def test_quantize_transpose(self):
        np.random.seed(1)
        model_fp32_path = 'transpose_fp32.onnx'
        model_uint8_path = 'transpose_uint8.onnx'
        model_uint8_qdq_path = 'transpose_uint8_qdq.onnx'

        self.construct_model_matmul_transpose(model_fp32_path, [3, 7], [7, 5], [5, 3])

        # Verify QOperator model
        data_reader = self.input_feeds(1, {'input': [3, 7]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)
        # make sure transpose become xint8 operator, its input name could tell that
        check_op_nodes(self, model_uint8_path, lambda node: (node.name != "transpose_node" or node.input[0] != 'matmul_output'))
        qnode_counts = {'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Transpose': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ model
        data_reader.rewind()
        quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ)
        qdqnode_counts = {'MatMul': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'Transpose': 1}
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
コード例 #8
0
    def static_quant_test_qdq(self,
                              model_fp32_path,
                              data_reader,
                              activation_type,
                              weight_type,
                              extra_options={}):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_int8_path = 'gemm_fp32.quant_dqd_{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        quant_nodes = {'Gemm': 2, 'QuantizeLinear': 3, 'DequantizeLinear': 7}
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
コード例 #9
0
ファイル: test_qdq.py プロジェクト: xkszltl/onnxruntime
    def verify(self, per_channel):
        np.random.seed(1)
        model_fp32_path = 'conv_clip_fp32.{}.onnx'.format(per_channel)
        model_int8_qdq_path = 'conv_clip_quant_qdq.{}.onnx'.format(per_channel)
        model_int8_qop_path = 'conv_clip_quant_qop.{}.onnx'.format(per_channel)
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_conv_clip(model_fp32_path,
                                       [1, 8, 33, 33],
                                       [16, 8, 3, 3],
                                       [15376])
        quantize_static(model_fp32_path,
                        model_int8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        #topo sort check
        check_op_type_order(self, model_int8_qdq_path, ['DequantizeLinear', 'QuantizeLinear', 'DequantizeLinear', 'Conv', 'QuantizeLinear', 'DequantizeLinear', 'Reshape', 'QuantizeLinear', 'DequantizeLinear'])
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path, data_reader.get_next())

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_qop_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qop_nodes = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path, data_reader.get_next())
コード例 #10
0
 def quantize_model(
     self,
     model_fp32_path,
     model_i8_path,
     data_reader=None,
     activation_type=QuantType.QUInt8,
     weight_type=QuantType.QUInt8,
     extra_options={},
 ):
     if data_reader is not None:
         quantize_static(
             model_fp32_path,
             model_i8_path,
             data_reader,
             reduce_range=True,
             quant_format=QuantFormat.QOperator,
             activation_type=activation_type,
             weight_type=weight_type,
             extra_options=extra_options,
         )
     else:
         quantize_dynamic(
             model_fp32_path,
             model_i8_path,
             reduce_range=True,
             weight_type=weight_type,
             extra_options=extra_options,
         )
コード例 #11
0
    def test_create_weight_matching(self):
        # Setup: create float model:
        float_model_path = str(Path(self._tmp_model_dir.name) / "float_model3.onnx")
        construct_test_model1(float_model_path, activations_as_outputs=False)

        # Setup: create qdq model:
        data_reader = TestDataReader()
        qdq_model_path = str(Path(self._tmp_model_dir.name) / "qdq_model3.onnx")
        quantize_static(
            float_model_path,
            qdq_model_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            per_channel=False,
            reduce_range=False,
            activation_type=QuantType.QInt8,
            weight_type=QuantType.QInt8,
        )

        # Call function under test and verify all weights are present
        matched_weights = create_weight_matching(float_model_path, qdq_model_path)
        weight_names = ["W1", "W3", "W5", "B1", "B3", "B5"]
        for weight_name in weight_names:
            float_array = matched_weights[weight_name]["float"]
            dq_array = matched_weights[weight_name]["dequantized"]
            self.assertEqual(float_array.shape, dq_array.shape)

        weights_error = compute_weight_error(matched_weights)
        for weight_name in weight_names:
            self.assertGreater(
                weights_error[weight_name],
                0.1,
                f"{weight_name} quantization error {weights_error[weight_name]} too big!",
            )
コード例 #12
0
ファイル: test_qdq.py プロジェクト: xkszltl/onnxruntime
    def verify_quantize_conv(self, has_bias, per_channel):
        np.random.seed(1)
        model_fp32_path = 'conv_fp32.{}.{}.onnx'.format(has_bias, per_channel)
        model_int8_qdq_path = 'conv_quant_qdq.{}.{}.onnx'.format(has_bias, per_channel)
        model_int8_qop_path = 'conv_quant_qop.{}.{}.onnx'.format(has_bias, per_channel)
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_conv(model_fp32_path,
                                  [1, 8, 33, 33],
                                  [16, 8, 3, 3],
                                  [1, 16, 31, 31],
                                  has_bias)
        quantize_static(model_fp32_path,
                        model_int8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qdq_nodes = {'Conv': 1, 'QuantizeLinear': 2, 'DequantizeLinear': 4 if has_bias else 3}
        check_op_type_count(self, model_int8_qdq_path, **qdq_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path, data_reader.get_next())

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_int8_qop_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        per_channel = per_channel,
                        reduce_range = per_channel
                        )
        data_reader.rewind()
        qop_nodes = {'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1}
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path, data_reader.get_next())
コード例 #13
0
    def test_create_weight_matching_per_channel(self):

        # float model
        #         (input)
        #           |
        #          Add
        #       /   |   \
        #  MatMul MatMul MatMul
        #     |     |      |
        # (output)(output)(output)
        float_model_path = str(Path(self._tmp_model_dir.name) / "float_model4.onnx")
        initializers = []
        input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [5, 5])
        output_tensor1 = helper.make_tensor_value_info("M", TensorProto.FLOAT, [5, 5])
        output_tensor2 = helper.make_tensor_value_info("N", TensorProto.FLOAT, [5, 5])
        output_tensor3 = helper.make_tensor_value_info("O", TensorProto.FLOAT, [5, 5])

        add_weight_data = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(add_weight_data, name="P"))
        matmul_weight_data_1 = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_1, name="Q"))
        matmul_weight_data_2 = np.random.normal(0, 0.1, [5, 5]).astype(np.float32)
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_2, name="R"))
        initializers.append(onnx.numpy_helper.from_array(matmul_weight_data_2, name="S"))

        add_node = onnx.helper.make_node("Add", ["input", "P"], ["T"], name="Add")
        matmul_node_1 = onnx.helper.make_node("MatMul", ["T", "Q"], ["M"], name="MatMul1")
        matmul_node_2 = onnx.helper.make_node("MatMul", ["T", "R"], ["N"], name="MatMul2")
        matmul_node_3 = onnx.helper.make_node("MatMul", ["T", "S"], ["O"], name="MatMul3")

        graph = helper.make_graph(
            [add_node, matmul_node_1, matmul_node_2, matmul_node_3],
            "QDQ_Test",
            [input_tensor],
            [output_tensor1, output_tensor2, output_tensor3],
            initializer=initializers,
        )
        model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
        onnx.save(model, float_model_path)

        # Setup: create qdq model:
        qdq_model_path = str(Path(self._tmp_model_dir.name) / "qdq_model4.onnx")
        quantize_static(
            float_model_path,
            qdq_model_path,
            TestDataReader([5, 5]),
            quant_format=QuantFormat.QDQ,
            per_channel=True,
            reduce_range=False,
            activation_type=QuantType.QInt8,
            weight_type=QuantType.QInt8,
        )

        # Call function under test and verify all weights are present
        matched_weights = create_weight_matching(float_model_path, qdq_model_path)
        weight_names = ["P", "Q", "R", "S"]
        for weight_name in weight_names:
            float_array = matched_weights[weight_name]["float"]
            dq_array = matched_weights[weight_name]["dequantized"]
            self.assertEqual(float_array.shape, dq_array.shape)
コード例 #14
0
def deploy_onnx_quantized(dataloader: DataLoader, model: nn.Module, fuse: bool,
                          name: str):
    model = deepcopy(model)
    path = f'./{name}'

    model = model.eval()
    if fuse:
        model.fuse()
        path += '_fused'

    float_path = path + '_float.onnx'
    quantized_path = path + '_quant.onnx'
    example_input = torch.rand(1, 3, 224, 224)
    onnx.export(model=model,
                args=(example_input, ),
                f=float_path,
                input_names=['input_image'],
                output_names=['logits'],
                opset_version=12)
    onnx_q_loader = ONNXQuantizationDataReader(quant_loader=dataloader,
                                               input_name='input_image')
    quantize_static(model_input=float_path,
                    model_output=quantized_path,
                    calibration_data_reader=onnx_q_loader)

    avg_time = benchmark_onnx_model(rt.InferenceSession(float_path))
    size = Path(float_path).stat().st_size / 1e6
    print(
        f'Benchmarking {float_path}: Avg. inference@CPU: {avg_time:3.2f} ms, Size: {size:2.2f} MB'
    )
    avg_time = benchmark_onnx_model(rt.InferenceSession(quantized_path))
    size = Path(quantized_path).stat().st_size / 1e6
    print(
        f'Benchmarking {quantized_path}: Avg. inference@CPU: {avg_time:3.2f} ms, Size: {size:2.2f} MB'
    )
コード例 #15
0
ファイル: test_qdq.py プロジェクト: xadupre/onnxruntime
    def verify(self, per_channel, is_quant_type_int8):
        np.random.seed(1)
        model_fp32_path = "conv_relu_fp32.{}.onnx".format(per_channel)
        model_int8_qdq_path = "conv_relu_quant_qdq.{}.onnx".format(per_channel)
        model_int8_qop_path = "conv_relu_quant_qop.{}.onnx".format(per_channel)
        data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]})
        self.construct_model_conv_relu(model_fp32_path, [1, 8, 33, 33],
                                       [16, 8, 3, 3], [1, 16, 31, 31])
        quantize_static(
            model_fp32_path,
            model_int8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            per_channel=per_channel,
            reduce_range=per_channel,
            activation_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
            weight_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
        )
        data_reader.rewind()
        # topo sort check
        check_op_type_order(
            self,
            model_int8_qdq_path,
            [
                "DequantizeLinear",
                "QuantizeLinear",
                "DequantizeLinear",
                "Conv",
                "QuantizeLinear",
                "DequantizeLinear",
            ],
        )
        check_model_correctness(self, model_fp32_path, model_int8_qdq_path,
                                data_reader.get_next())

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_qop_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            per_channel=per_channel,
            reduce_range=per_channel,
            activation_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
            weight_type=QuantType.QInt8
            if is_quant_type_int8 else QuantType.QUInt8,
        )
        data_reader.rewind()
        qop_nodes = {
            "QLinearConv": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1
        }
        check_op_type_count(self, model_int8_qop_path, **qop_nodes)
        check_model_correctness(self, model_fp32_path, model_int8_qop_path,
                                data_reader.get_next())
コード例 #16
0
ファイル: test_op_pad.py プロジェクト: yuto51942/onnxruntime
 def quantize_model(self, model_fp32_path, model_i8_path, data_reader=None):
     if data_reader is not None:
         quantize_static(model_fp32_path,
                         model_i8_path,
                         data_reader,
                         reduce_range=True)
     else:
         quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True)
コード例 #17
0
    def test_create_activation_matching_present(self):
        float_model_path = str(Path(self._tmp_model_dir.name) / "float_model2.onnx")
        construct_test_model1(float_model_path, activations_as_outputs=False)
        data_reader = TestDataReader()

        qdq_model_path = str(Path(self._tmp_model_dir.name) / "qdq_model2.onnx")
        quantize_static(
            float_model_path,
            qdq_model_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            per_channel=False,
            reduce_range=False,
            activation_type=QuantType.QInt8,
            weight_type=QuantType.QInt8,
        )

        data_reader.rewind()
        augmented_float_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_float_model2.onnx"))
        float_activations = augment_model_collect_activations(float_model_path, augmented_float_model_path, data_reader)

        data_reader.rewind()
        augmented_qdq_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_qdq_model2.onnx"))
        qdq_activations = augment_model_collect_activations(qdq_model_path, augmented_qdq_model_path, data_reader)

        compare_dict = create_activation_matching(qdq_activations, float_activations)

        # 'Conv1Out' is combined with 'Relu2Out'
        tensor_names = [
            "Relu1Out",
            "Relu2Out",
            "Conv2Out",
            "Conv3Out",
            "AddOut",
        ]
        for tensor_name in tensor_names:
            self.assertTrue(compare_dict[tensor_name]["float"])
            self.assertTrue(compare_dict[tensor_name]["pre_qdq"])
            self.assertTrue(compare_dict[tensor_name]["post_qdq"])

        self.assertFalse(compare_dict.get("Conv1Out"))

        activations_error = compute_activation_error(compare_dict)
        for tensor_name in tensor_names:
            self.assertGreater(
                activations_error[tensor_name]["xmodel_err"],
                0.01,
                f"{tensor_name} cross model error {activations_error[tensor_name]['xmodel_err']} exceeds threashold.",
            )
            self.assertGreater(
                activations_error[tensor_name]["qdq_err"],
                0.01,
                f"{tensor_name} qdq error {activations_error[tensor_name]['qdq_err']} exceeds threashold.",
            )
コード例 #18
0
 def static_quant_test(self, model_fp32_path, model_int8_path):
     data_reader = self.input_feeds(1, {'input': [5, 10]})
     quantize_static(model_fp32_path,
                     model_int8_path,
                     data_reader)
     data_reader.rewind()
     quant_nodes = {'QLinearMatMul' : 2,
                    'QLinearAdd' : 2,
                    'QuantizeLinear' : 1,
                    'DequantizeLinear' : 1}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
コード例 #19
0
    def quantize_gavgpool_test(self,
                               activation_type,
                               weight_type,
                               extra_options={}):
        np.random.seed(1)
        model_fp32_path = "gavg_pool_fp32.onnx"
        data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]})
        self.construct_model_gavgpool(model_fp32_path, [1, 8, 33, 33],
                                      [16, 8, 3, 3], [1, 16, 1, 1])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_q8_path = "gavg_pool_{}{}.onnx".format(activation_type_str,
                                                     weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        quant_nodes = {
            "QLinearConv": 1,
            "GlobalAveragePool": 1,
            "QLinearGlobalAveragePool": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1,
        }
        check_op_type_count(self, model_q8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update({
            "QLinearGlobalAveragePool": [
                ["i", 2, activation_proto_qtype],
                ["i", 4, activation_proto_qtype],
            ]
        })
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())
コード例 #20
0
 def static_quant_test_qdq(self, model_fp32_path, model_int8_path):
     data_reader = self.input_feeds(1, {'input': [5, 10]})
     quantize_static(model_fp32_path,
                     model_int8_path,
                     data_reader,
                     quant_format=QuantFormat.QDQ)
     data_reader.rewind()
     quant_nodes = {'MatMul' : 2,
                    'Add' : 2,
                    'QuantizeLinear' : 4,
                    'DequantizeLinear' : 8}
     check_op_type_count(self, model_int8_path, **quant_nodes)
     check_model_correctness(self, model_fp32_path, model_int8_path, data_reader.get_next())
コード例 #21
0
ファイル: run.py プロジェクト: etsangsplk/onnxruntime
def main():
    args = get_args()
    input_model_path = args.input_model
    output_model_path = args.output_model
    calibration_dataset_path = args.calibrate_dataset
    dr = ResNet50DataReader(calibration_dataset_path)
    quantize_static(input_model_path, output_model_path, dr)
    print('Calibrated and quantized model saved.')

    print('benchmarking fp32 model...')
    benchmark(input_model_path)

    print('benchmarking int8 model...')
    benchmark(output_model_path)
コード例 #22
0
    def static_quant_test(
        self,
        model_fp32_path,
        data_reader,
        activation_type,
        weight_type,
        extra_options={},
    ):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "relu_fp32.quant_{}{}.onnx".format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        qdq_count = 1 if activation_type == QuantType.QUInt8 else 2
        relu_count = 0 if activation_type == QuantType.QUInt8 else 1
        quant_nodes = {
            "QGemm": 2,
            "QuantizeLinear": qdq_count,
            "DequantizeLinear": qdq_count,
            "Relu": relu_count
        }
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update(
            {"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
コード例 #23
0
    def test_quantize_maxpool(self):
        np.random.seed(1)

        model_fp32_path = 'maxpool_fp32.onnx'
        model_uint8_path = 'maxpool_uint8.onnx'
        model_uint8_qdq_path = 'maxpool_uint8_qdq.onnx'

        self.construct_model_conv_maxpool(model_fp32_path, [1, 2, 26, 42],
                                          [3, 2, 3, 3], [1, 3, 24, 40],
                                          {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})
        quantize_static(model_fp32_path, model_uint8_path, data_reader)

        # make sure maxpool become xint8 operator, its input name could tell that
        check_op_nodes(
            self, model_uint8_path, lambda node:
            (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
        qnode_counts = {
            'QLinearConv': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 2,
            'MaxPool': 1
        }
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_uint8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ)
        qdqnode_counts = {
            'Conv': 1,
            'QuantizeLinear': 2,
            'DequantizeLinear': 3,
            'MaxPool': 1
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())
コード例 #24
0
    def static_quant_test_qdq(
        self,
        model_fp32_path,
        data_reader,
        activation_type,
        weight_type,
        extra_options={},
    ):
        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_int8_path = "gemm_fp32.quant_dqd_{}{}.onnx".format(
            activation_type_str, weight_type_str)

        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_int8_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        clip_count = 0 if activation_type == QuantType.QUInt8 else 1
        q_count = 3 if activation_type == QuantType.QUInt8 else 4
        dq_count = 7 if activation_type == QuantType.QUInt8 else 8
        quant_nodes = {
            "Gemm": 2,
            "QuantizeLinear": q_count,
            "DequantizeLinear": dq_count,
            "Clip": clip_count
        }
        check_op_type_count(self, model_int8_path, **quant_nodes)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_int8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_int8_path,
                                data_reader.get_next())
コード例 #25
0
    def quantize_gavgpool_test(self,
                               activation_type,
                               weight_type,
                               extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'gavg_pool_fp32.onnx'
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_gavgpool(model_fp32_path, [1, 8, 33, 33],
                                      [16, 8, 3, 3], [1, 16, 1, 1])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'gavg_pool_{}{}.onnx'.format(activation_type_str,
                                                     weight_type_str)

        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)

        quant_nodes = {
            'QLinearConv': 1,
            'GlobalAveragePool': 1,
            'QLinearGlobalAveragePool': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 1
        }
        check_op_type_count(self, model_q8_path, **quant_nodes)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update({
            'QLinearGlobalAveragePool': [['i', 2, activation_proto_qtype],
                                         ['i', 4, activation_proto_qtype]]
        })
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())
コード例 #26
0
    def test_quantize_relu_conv(self):
        float_model_path = str(
            Path(self._tmp_model_dir.name) / "float_relu_convs_model.onnx")
        construct_relu_conv_model(float_model_path)
        data_reader = self.input_feeds(2, {"input": [1, 3, 1, 3]})

        qdq_model_path = str(
            Path(self._tmp_model_dir.name) / "qdq_relu_convs_model.onnx")
        quantize_static(
            float_model_path,
            qdq_model_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            per_channel=False,
            reduce_range=False,
            activation_type=QuantType.QInt8,
            weight_type=QuantType.QInt8,
        )
コード例 #27
0
    def test_activation_only(self):
        float_model_path = str(
            Path(self._tmp_model_dir.name) / "float_relu_convs_model.onnx")
        self.construct_model_clip_relu(float_model_path, [1, 3, 1, 3],
                                       [1, 3, 1, 3])
        data_reader = self.input_feeds(2, {"input": [1, 3, 1, 3]})

        qdq_model_path = str(
            Path(self._tmp_model_dir.name) / "qdq_relu_convs_model.onnx")
        quantize_static(float_model_path, qdq_model_path, data_reader)

        qop_nodes = {
            "Clip": 1,
            "Relu": 1,
            "QuantizeLinear": 0,
            "DequantizeLinear": 0
        }
        check_op_type_count(self, qdq_model_path, **qop_nodes)
コード例 #28
0
def main():
    args = get_args()
    input_model_path = args.input_model
    output_model_path = args.output_model
    calibration_dataset_path = args.calibrate_dataset
    dr = ResNet50DataReader(calibration_dataset_path)
    quantize_static(input_model_path,
                    output_model_path,
                    dr,
                    quant_format=args.quant_format,
                    per_channel=args.per_channel,
                    weight_type=QuantType.QInt8)
    print('Calibrated and quantized model saved.')

    print('benchmarking fp32 model...')
    benchmark(input_model_path)

    print('benchmarking int8 model...')
    benchmark(output_model_path)
コード例 #29
0
def quantize_and_save_model(name,
                            input,
                            model,
                            act_type="uint8",
                            wt_type="uint8",
                            per_channel=False):
    float_model_path = os.path.join("models", "dummy.onnx")
    quantized_model_path = os.path.join("models", name + ".onnx")
    type_dict = {"uint8": QuantType.QUInt8, "int8": QuantType.QInt8}

    model.eval()
    torch.onnx.export(model,
                      input,
                      float_model_path,
                      export_params=True,
                      opset_version=12)

    dr = DataReader(float_model_path)
    quantize_static(float_model_path,
                    quantized_model_path,
                    dr,
                    per_channel=per_channel,
                    activation_type=type_dict[act_type],
                    weight_type=type_dict[wt_type])

    os.remove(float_model_path)
    os.remove(os.path.join("models", "dummy-opt.onnx"))
    os.remove("augmented_model.onnx")

    sess = rt.InferenceSession(quantized_model_path, None)
    input = np.random.uniform(-1, 1,
                              sess.get_inputs()[0].shape).astype("float32")
    output = sess.run([sess.get_outputs()[0].name],
                      {sess.get_inputs()[0].name: input})[0]

    print(name + " input has sizes", input.shape)
    input_files = os.path.join("data", "input_" + name)
    np.save(input_files, input.data)

    print(name + " output has sizes", output.shape)
    output_files = os.path.join("data", "output_" + name)
    np.save(output_files, np.ascontiguousarray(output.data))
コード例 #30
0
    def test_save_as_external(self):
        data_reader = InputFeedsNegOneZeroOne(
            10, {"input": [1, self._channel_size, 1, 3]})
        for use_external_data_format in [True, False]:
            quant_model_path = str(
                Path(self._tmp_model_dir.name) /
                f"quant.{use_external_data_format}.onnx")
            quantize_static(
                self._model_fp32_path,
                quant_model_path,
                data_reader,
                activation_type=QuantType.QUInt8,
                weight_type=QuantType.QUInt8,
                use_external_data_format=use_external_data_format,
            )

            data_reader.rewind()
            check_model_correctness(self, self._model_fp32_path,
                                    quant_model_path, data_reader.get_next())
            data_reader.rewind()