Ejemplo n.º 1
0
    def quantize_softmax_test(self,
                              activation_type,
                              weight_type,
                              extra_options={}):
        np.random.seed(1)
        model_fp32_path = "softmax_fp32.onnx"
        self.construct_model_conv_softmax(
            model_fp32_path,
            [1, 2, 26, 42],
            [3, 2, 3, 3],
            [1, 3, 24, 40],
            {"axis": -2},
            [1, 3, 24, 40],
        )
        data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_q8_path = f"softmax_{activation_type_str}{weight_type_str}.onnx"
        model_q8_qdq_path = f"softmax_qdq_{activation_type_str}{weight_type_str}.onnx"

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qnode_counts = {
            "QLinearConv": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 2,
            "QLinearSoftmax": 1,
            "Softmax": 0,
        }
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update({
            "QLinearConv": [
                ["i", 2, activation_proto_qtype],
                ["i", 7, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        })
        qnode_io_qtypes.update({
            "QLinearSoftmax": [["i", 4, activation_proto_qtype]]
        })  # shape info note workig on custome ops
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "Conv": 1,
            "QuantizeLinear": 3,
            "DequantizeLinear": 4,
            "Softmax": 1,
        }
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 2
0
    def quantize_reshape_test(self, activation_type, weight_type, extra_options={}):
        np.random.seed(1)
        model_fp32_path = "reshape_fp32.onnx"

        self.construct_model_matmul_reshape(model_fp32_path, [3, 7], [7, 3], [1, 9])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_uint8_path = "reshape_{}{}.onnx".format(activation_type_str, weight_type_str)
        model_uint8_qdq_path = "reshape_{}{}_qdq.onnx".format(activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {"input": [3, 7]})
        quantize_static(
            model_fp32_path,
            model_uint8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        # make sure reshape become xint8 operator, its input name could tell that
        check_op_nodes(
            self,
            model_uint8_path,
            lambda node: (node.name != "reshape_node" or node.input[0] != "matmul_output"),
        )
        qnode_counts = {
            "QLinearMatMul": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1,
            "Reshape": 1,
        }
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update({"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_uint8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "MatMul": 1,
            "QuantizeLinear": 3,
            "DequantizeLinear": 4,
            "Reshape": 1,
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
Ejemplo n.º 3
0
    def quantize_avgpool_test(self,
                              activation_type,
                              weight_type,
                              extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'avgpool_fp32.onnx'
        self.construct_model_conv_avgpool(model_fp32_path, [1, 2, 26, 42],
                                          [3, 2, 3, 3], [1, 3, 24, 40],
                                          {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'avgpool_{}{}.onnx'.format(activation_type_str,
                                                   weight_type_str)
        model_q8_qdq_path = 'avgpool_qdq_{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qnode_counts = {
            'QLinearConv': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 2,
            'QLinearAveragePool': 1
        }
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update({
            'QLinearConv': [['i', 2, activation_proto_qtype],
                            ['i', 7, activation_proto_qtype],
                            ['o', 0, activation_proto_qtype]]
        })
        qnode_io_qtypes.update({
            'QLinearAveragePool': [['i', 4, activation_proto_qtype]]
        })  # shape info note workig on custome ops
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qdqnode_counts = {
            'Conv': 1,
            'QuantizeLinear': 3,
            'DequantizeLinear': 4,
            'AveragePool': 1
        }
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 4
0
    def quantize_resize_test(self,
                             activation_type,
                             weight_type,
                             extra_options={}):
        np.random.seed(1)
        model_fp32_path = "resize_fp32.onnx"

        kwargs = {
            "coordinate_transformation_mode": "asymmetric",
            "mode": "nearest",
            "nearest_mode": "floor",
        }
        self.construct_model_conv_resize(
            model_fp32_path,
            [1, 2, 26, 42],
            [3, 2, 3, 3],
            [1, 3, 24, 40],
            [1, 3, 48, 80],
            kwargs,
            [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
            [1.0, 1.0, 2.0, 2.0],
            None,
        )

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_uint8_path = "resize_{}{}.onnx".format(activation_type_str,
                                                     weight_type_str)
        model_uint8_qdq_path = "resize_{}{}_qdq.onnx".format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]})
        quantize_static(
            model_fp32_path,
            model_uint8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        # make sure resize become xint8 operator, its input name could tell that
        check_op_nodes(
            self,
            model_uint8_path,
            lambda node:
            (node.name != "resize_node" or node.input[0] != "conv_output"),
        )
        qnode_counts = {
            "QLinearConv": 1,
            "QuantizeLinear": 1,
            "DequantizeLinear": 2,
            "Resize": 1,
        }
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update(
            {"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_uint8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "Conv": 1,
            "QuantizeLinear": 3,
            "DequantizeLinear": 4,
            "Resize": 1,
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 5
0
    def quantize_maxpool_test(self,
                              activation_type,
                              weight_type,
                              extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'maxpool_fp32.onnx'
        self.construct_model_conv_maxpool(model_fp32_path, [1, 2, 26, 42],
                                          [3, 2, 3, 3], [1, 3, 24, 40],
                                          {'kernel_shape': [3, 3]},
                                          [1, 3, 22, 38])
        data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'maxpool_{}{}.onnx'.format(activation_type_str,
                                                   weight_type_str)
        model_q8_qdq_path = 'maxpool_dqd_{}{}.onnx'.format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_path,
                        data_reader,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        # make sure maxpool become xint8 operator, its input name could tell that
        check_op_nodes(
            self, model_q8_path, lambda node:
            (node.name != "maxpool_node" or node.input[0] != 'conv_output'))
        qnode_counts = {
            'QLinearConv': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 2,
            'MaxPool': 1
        }
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update(
            {'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qdqnode_counts = {
            'Conv': 1,
            'QuantizeLinear': 3,
            'DequantizeLinear': 4,
            'MaxPool': 1
        }
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update(
            {'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 6
0
    def run_quantize_squeezes_of_opset(
        self,
        opset=13,
        activation_type=QuantType.QUInt8,
        weight_type=QuantType.QUInt8,
        extra_options={},
    ):
        np.random.seed(1)

        model_fp32_path = "squeezes_opset{}_fp32.onnx".format(opset)
        self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42],
                                           [3, 2, 3, 3], [1, 3, 24, 40],
                                           opset=opset)

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_uint8_path = "squeezes_opset{}_{}{}.onnx".format(
            opset, activation_type_str, weight_type_str)
        model_uint8_qdq_path = "squeezes_opset{}_{}{}_qdq.onnx".format(
            opset, activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]})
        quantize_static(
            model_fp32_path,
            model_uint8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        # make sure squeezes become xint8 operator, its input name could tell that
        qnode_counts = {"QuantizeLinear": 1, "DequantizeLinear": 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update(
            {"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(
            self,
            model_fp32_path,
            model_uint8_path,
            data_reader.get_next(),
            rtol=0.01,
            atol=0.5,
        )

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_uint8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "Conv": 3,
            "QuantizeLinear": 9,
            "DequantizeLinear": 12
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(
            self,
            model_fp32_path,
            model_uint8_qdq_path,
            data_reader.get_next(),
            rtol=0.01,
            atol=0.5,
        )
Ejemplo n.º 7
0
    def quantize_reshape_test(self,
                              activation_type,
                              weight_type,
                              extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'reshape_fp32.onnx'

        self.construct_model_matmul_reshape(model_fp32_path, [3, 7], [7, 3],
                                            [1, 9])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_uint8_path = 'reshape_{}{}.onnx'.format(activation_type_str,
                                                      weight_type_str)
        model_uint8_qdq_path = 'reshape_{}{}_qdq.onnx'.format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [3, 7]})
        quantize_static(model_fp32_path,
                        model_uint8_path,
                        data_reader,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        # make sure transpose become xint8 operator, its input name could tell that
        check_op_nodes(
            self, model_uint8_path, lambda node:
            (node.name != "reshape_node" or node.input[0] != 'matmul_output'))
        qnode_counts = {
            'QLinearMatMul': 1,
            'QuantizeLinear': 1,
            'DequantizeLinear': 1,
            'Reshape': 1
        }
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update(
            {'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_uint8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qdqnode_counts = {
            'MatMul': 1,
            'QuantizeLinear': 3,
            'DequantizeLinear': 4,
            'Reshape': 1
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 8
0
    def quantize_concat_test(self, activation_type, weight_type, extra_options={}):
        np.random.seed(1)
        model_fp32_path = "concat_fp32.onnx"
        self.construct_model(model_fp32_path)
        data_reader = InputFeedsNegOneZeroOne(1, {"input": [1, 3, 15, 15]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_q8_path = "concat_{}{}.onnx".format(activation_type_str, weight_type_str)
        model_q8_qdq_path = "concat_{}{}_qdq.onnx".format(activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )

        qnode_counts = {
            "QLinearConv": 3,
            "QuantizeLinear": 1,
            "DequantizeLinear": 1,
            "QLinearConcat": 1,
        }
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update(
            {
                "QLinearConcat": [
                    ["i", 1, activation_proto_qtype],
                    ["i", 4, activation_proto_qtype],
                    ["i", 7, activation_proto_qtype],
                ]
            }
        )
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_q8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "Conv": 3,
            "QuantizeLinear": 5,
            "DequantizeLinear": 8,
            "Concat": 1,
        }
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
Ejemplo n.º 9
0
    def verify_quantize_with_pad_mode(
        self,
        pad_mode,
        constant_value=None,
        quantize_mode="static",
        rtol=0.01,
        atol=0.05,
        activation_type=QuantType.QUInt8,
        weight_type=QuantType.QUInt8,
        extra_options={},
    ):
        np.random.seed(108)
        tag_pad_mode = pad_mode if pad_mode is not None else "none"
        tag_constant_value = "" if constant_value is None else "_value"
        model_fp32_path = "qop_pad_{}_fp32_{}{}.onnx".format(quantize_mode, tag_pad_mode, tag_constant_value)
        data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]})
        self.construct_model_conv_pad(
            model_fp32_path,
            [1, 8, 33, 33],
            [16, 8, 3, 3],
            [1, 16, 31, 31],
            pad_mode,
            [0, 0, 1, 2, 0, 0, 3, 4],
            constant_value=constant_value,
        )

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_i8_path = "qop_pad_{}_i8_{}{}_{}{}.onnx".format(
            quantize_mode,
            tag_pad_mode,
            tag_constant_value,
            activation_type_str,
            weight_type_str,
        )
        data_reader.rewind()
        self.quantize_model(
            model_fp32_path,
            model_i8_path,
            None if quantize_mode != "static" else data_reader,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad,
        # which means pad node is running in quantized semantic.
        # In dynamic quantize mode, pad operator in fact not quantized as input is fp32.
        if quantize_mode != "static":
            kwargs = {"DynamicQuantizeLinear": 1} if activation_type == QuantType.QUInt8 else {"QuantizeLinear": 1}
        else:
            kwargs = {"DequantizeLinear": 2, "QuantizeLinear": 1}
        check_op_type_count(self, model_i8_path, **kwargs)
        # check node input/output type if such node exists in the graph
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        qnode_io_qtypes.update({"DequantizeLinear": [["i", 2, activation_proto_qtype]]})
        qnode_io_qtypes.update({"ConvInteger": [["i", 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(
            self,
            model_fp32_path,
            model_i8_path,
            data_reader.get_next(),
            rtol=rtol,
            atol=atol,
        )
Ejemplo n.º 10
0
    def quantize_argmax_test(self,
                             activation_type,
                             weight_type,
                             extra_options={}):
        np.random.seed(1)
        model_fp32_path = "argmax_fp32.onnx"

        self.construct_model_argmax(model_fp32_path, [1, 256, 128, 128],
                                    [1, 32, 128])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = "u8" if (activation_type
                                       == QuantType.QUInt8) else "s8"
        weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8"
        model_uint8_path = "argmax_{}{}.onnx".format(activation_type_str,
                                                     weight_type_str)
        model_uint8_qdq_path = "argmax_{}{}_qdq.onnx".format(
            activation_type_str, weight_type_str)
        model_uint8_qdq_trt_path = "argmax_{}{}_qdq_trt.onnx".format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {"input": [1, 256, 128, 128]})
        quantize_static(
            model_fp32_path,
            model_uint8_path,
            data_reader,
            quant_format=QuantFormat.QOperator,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        # make sure argmax become xint8 operator, its input name could tell that
        check_op_nodes(
            self,
            model_uint8_path,
            lambda node: not (node.name == "argmax_node" and node.input[0] ==
                              "conv_output"),
        )
        qnode_counts = {"QuantizeLinear": 1, "QLinearConv": 1, "ArgMax": 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_uint8_qdq_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
        )
        qdqnode_counts = {
            "QuantizeLinear": 2,
            "DequantizeLinear": 3,
            "ArgMax": 1
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())

        # Verify QDQ mode for TensorRT
        data_reader.rewind()
        quantize_static(
            model_fp32_path,
            model_uint8_qdq_trt_path,
            data_reader,
            quant_format=QuantFormat.QDQ,
            activation_type=activation_type,
            weight_type=weight_type,
            extra_options=extra_options,
            op_types_to_quantize=["ArgMax"],
        )
        qdqnode_counts = {
            "QuantizeLinear": 1,
            "DequantizeLinear": 1,
            "ArgMax": 1
        }
        check_op_type_count(self, model_uint8_qdq_trt_path, **qdqnode_counts)
        qnode_io_qtypes = {
            "QuantizeLinear": [
                ["i", 2, activation_proto_qtype],
                ["o", 0, activation_proto_qtype],
            ]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_trt_path,
                                 qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self,
                                model_fp32_path, model_uint8_qdq_trt_path,
                                data_reader.get_next())
Ejemplo n.º 11
0
    def verify_quantize_with_pad_mode(self,
                                      pad_mode,
                                      constant_value=None,
                                      quantize_mode='static',
                                      rtol=0.01,
                                      atol=0.05,
                                      activation_type=QuantType.QUInt8,
                                      weight_type=QuantType.QUInt8,
                                      extra_options={}):
        np.random.seed(108)
        tag_pad_mode = pad_mode if pad_mode is not None else 'none'
        tag_constant_value = '' if constant_value is None else '_value'
        model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format(
            quantize_mode, tag_pad_mode, tag_constant_value)
        data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]})
        self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33],
                                      [16, 8, 3, 3], [1, 16, 31, 31],
                                      pad_mode, [0, 0, 1, 2, 0, 0, 3, 4],
                                      constant_value=constant_value)

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_i8_path = 'qop_pad_{}_i8_{}{}_{}{}.onnx'.format(
            quantize_mode, tag_pad_mode, tag_constant_value,
            activation_type_str, weight_type_str)
        data_reader.rewind()
        self.quantize_model(model_fp32_path,
                            model_i8_path,
                            None if quantize_mode != 'static' else data_reader,
                            activation_type=activation_type,
                            weight_type=weight_type,
                            extra_options=extra_options)
        # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad,
        # which means pad node is running in quantized semantic.
        # In dynamic quantize mode, pad operator in fact not quantized as input is fp32.
        if quantize_mode != 'static':
            kwargs = {
                'DynamicQuantizeLinear': 1
            } if activation_type == QuantType.QUInt8 else {
                'QuantizeLinear': 1
            }
        else:
            kwargs = {'DequantizeLinear': 2, 'QuantizeLinear': 1}
        check_op_type_count(self, model_i8_path, **kwargs)
        # check node input/output type if such node exists in the graph
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update(
            {'DequantizeLinear': [['i', 2, activation_proto_qtype]]})
        qnode_io_qtypes.update(
            {'ConvInteger': [['i', 2, activation_proto_qtype]]})
        check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self,
                                model_fp32_path,
                                model_i8_path,
                                data_reader.get_next(),
                                rtol=rtol,
                                atol=atol)
Ejemplo n.º 12
0
    def quantize_concat_test(self,
                             activation_type,
                             weight_type,
                             extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'concat_fp32.onnx'
        self.construct_model(model_fp32_path)
        data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]})

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_q8_path = 'concat_{}{}.onnx'.format(activation_type_str,
                                                  weight_type_str)
        model_q8_qdq_path = 'concat_{}{}_qdq.onnx'.format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_path,
                        data_reader,
                        quant_format=QuantFormat.QOperator,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)

        qnode_counts = {
            'QLinearConv': 3,
            'QuantizeLinear': 1,
            'DequantizeLinear': 1,
            'QLinearConcat': 1
        }
        check_op_type_count(self, model_q8_path, **qnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        qnode_io_qtypes.update({
            'QLinearConcat': [['i', 1, activation_proto_qtype],
                              ['i', 4, activation_proto_qtype],
                              ['i', 7, activation_proto_qtype]]
        })
        check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_q8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qdqnode_counts = {
            'Conv': 3,
            'QuantizeLinear': 5,
            'DequantizeLinear': 8,
            'Concat': 1
        }
        check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_q8_qdq_path,
                                data_reader.get_next())
Ejemplo n.º 13
0
    def quantize_argmax_test(self,
                             activation_type,
                             weight_type,
                             extra_options={}):
        np.random.seed(1)
        model_fp32_path = 'argmax_fp32.onnx'

        self.construct_model_argmax(model_fp32_path, [1, 256, 128, 128],
                                    [1, 32, 128])

        activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8
        activation_type_str = 'u8' if (activation_type
                                       == QuantType.QUInt8) else 's8'
        weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8'
        model_uint8_path = 'argmax_{}{}.onnx'.format(activation_type_str,
                                                     weight_type_str)
        model_uint8_qdq_path = 'argmax_{}{}_qdq.onnx'.format(
            activation_type_str, weight_type_str)

        # Verify QOperator mode
        data_reader = self.input_feeds(1, {'input': [1, 256, 128, 128]})
        quantize_static(model_fp32_path,
                        model_uint8_path,
                        data_reader,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        # make sure argmax become xint8 operator, its input name could tell that
        check_op_nodes(
            self, model_uint8_path, lambda node: not (
                node.name == "argmax_node" and node.input[0] == 'conv_output'))
        qnode_counts = {'QuantizeLinear': 1, 'QLinearConv': 1, 'ArgMax': 1}
        check_op_type_count(self, model_uint8_path, **qnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_path,
                                data_reader.get_next())

        # Verify QDQ mode
        data_reader.rewind()
        quantize_static(model_fp32_path,
                        model_uint8_qdq_path,
                        data_reader,
                        quant_format=QuantFormat.QDQ,
                        activation_type=activation_type,
                        weight_type=weight_type,
                        extra_options=extra_options)
        qdqnode_counts = {
            'QuantizeLinear': 2,
            'DequantizeLinear': 3,
            'ArgMax': 1
        }
        check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts)
        qnode_io_qtypes = {
            'QuantizeLinear': [['i', 2, activation_proto_qtype],
                               ['o', 0, activation_proto_qtype]]
        }
        check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes)
        data_reader.rewind()
        check_model_correctness(self, model_fp32_path, model_uint8_qdq_path,
                                data_reader.get_next())