def quantize_softmax_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = "softmax_fp32.onnx" self.construct_model_conv_softmax( model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], {"axis": -2}, [1, 3, 24, 40], ) data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]}) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_q8_path = f"softmax_{activation_type_str}{weight_type_str}.onnx" model_q8_qdq_path = f"softmax_qdq_{activation_type_str}{weight_type_str}.onnx" # Verify QOperator mode data_reader.rewind() quantize_static( model_fp32_path, model_q8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qnode_counts = { "QLinearConv": 1, "QuantizeLinear": 1, "DequantizeLinear": 2, "QLinearSoftmax": 1, "Softmax": 0, } check_op_type_count(self, model_q8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update({ "QLinearConv": [ ["i", 2, activation_proto_qtype], ["i", 7, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] }) qnode_io_qtypes.update({ "QLinearSoftmax": [["i", 4, activation_proto_qtype]] }) # shape info note workig on custome ops check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "Conv": 1, "QuantizeLinear": 3, "DequantizeLinear": 4, "Softmax": 1, } check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
def quantize_reshape_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = "reshape_fp32.onnx" self.construct_model_matmul_reshape(model_fp32_path, [3, 7], [7, 3], [1, 9]) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_uint8_path = "reshape_{}{}.onnx".format(activation_type_str, weight_type_str) model_uint8_qdq_path = "reshape_{}{}_qdq.onnx".format(activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {"input": [3, 7]}) quantize_static( model_fp32_path, model_uint8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) # make sure reshape become xint8 operator, its input name could tell that check_op_nodes( self, model_uint8_path, lambda node: (node.name != "reshape_node" or node.input[0] != "matmul_output"), ) qnode_counts = { "QLinearMatMul": 1, "QuantizeLinear": 1, "DequantizeLinear": 1, "Reshape": 1, } check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update({"DequantizeLinear": [["i", 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "MatMul": 1, "QuantizeLinear": 3, "DequantizeLinear": 4, "Reshape": 1, } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
def quantize_avgpool_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = 'avgpool_fp32.onnx' self.construct_model_conv_avgpool(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], {'kernel_shape': [3, 3]}, [1, 3, 22, 38]) data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]}) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_q8_path = 'avgpool_{}{}.onnx'.format(activation_type_str, weight_type_str) model_q8_qdq_path = 'avgpool_qdq_{}{}.onnx'.format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qnode_counts = { 'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'QLinearAveragePool': 1 } check_op_type_count(self, model_q8_path, **qnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update({ 'QLinearConv': [['i', 2, activation_proto_qtype], ['i', 7, activation_proto_qtype], ['o', 0, activation_proto_qtype]] }) qnode_io_qtypes.update({ 'QLinearAveragePool': [['i', 4, activation_proto_qtype]] }) # shape info note workig on custome ops check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qdqnode_counts = { 'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'AveragePool': 1 } check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
def quantize_resize_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = "resize_fp32.onnx" kwargs = { "coordinate_transformation_mode": "asymmetric", "mode": "nearest", "nearest_mode": "floor", } self.construct_model_conv_resize( model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], [1, 3, 48, 80], kwargs, [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 2.0, 2.0], None, ) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_uint8_path = "resize_{}{}.onnx".format(activation_type_str, weight_type_str) model_uint8_qdq_path = "resize_{}{}_qdq.onnx".format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]}) quantize_static( model_fp32_path, model_uint8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) # make sure resize become xint8 operator, its input name could tell that check_op_nodes( self, model_uint8_path, lambda node: (node.name != "resize_node" or node.input[0] != "conv_output"), ) qnode_counts = { "QLinearConv": 1, "QuantizeLinear": 1, "DequantizeLinear": 2, "Resize": 1, } check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update( {"DequantizeLinear": [["i", 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "Conv": 1, "QuantizeLinear": 3, "DequantizeLinear": 4, "Resize": 1, } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
def quantize_maxpool_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = 'maxpool_fp32.onnx' self.construct_model_conv_maxpool(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], {'kernel_shape': [3, 3]}, [1, 3, 22, 38]) data_reader = self.input_feeds(1, {'input': [1, 2, 26, 42]}) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_q8_path = 'maxpool_{}{}.onnx'.format(activation_type_str, weight_type_str) model_q8_qdq_path = 'maxpool_dqd_{}{}.onnx'.format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_path, data_reader, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) # make sure maxpool become xint8 operator, its input name could tell that check_op_nodes( self, model_q8_path, lambda node: (node.name != "maxpool_node" or node.input[0] != 'conv_output')) qnode_counts = { 'QLinearConv': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 2, 'MaxPool': 1 } check_op_type_count(self, model_q8_path, **qnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update( {'DequantizeLinear': [['i', 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qdqnode_counts = { 'Conv': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'MaxPool': 1 } check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update( {'DequantizeLinear': [['i', 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
def run_quantize_squeezes_of_opset( self, opset=13, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}, ): np.random.seed(1) model_fp32_path = "squeezes_opset{}_fp32.onnx".format(opset) self.construct_model_conv_squeezes(model_fp32_path, [1, 2, 26, 42], [3, 2, 3, 3], [1, 3, 24, 40], opset=opset) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_uint8_path = "squeezes_opset{}_{}{}.onnx".format( opset, activation_type_str, weight_type_str) model_uint8_qdq_path = "squeezes_opset{}_{}{}_qdq.onnx".format( opset, activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {"input": [1, 2, 26, 42]}) quantize_static( model_fp32_path, model_uint8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) # make sure squeezes become xint8 operator, its input name could tell that qnode_counts = {"QuantizeLinear": 1, "DequantizeLinear": 1} check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update( {"DequantizeLinear": [["i", 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness( self, model_fp32_path, model_uint8_path, data_reader.get_next(), rtol=0.01, atol=0.5, ) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "Conv": 3, "QuantizeLinear": 9, "DequantizeLinear": 12 } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness( self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next(), rtol=0.01, atol=0.5, )
def quantize_reshape_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = 'reshape_fp32.onnx' self.construct_model_matmul_reshape(model_fp32_path, [3, 7], [7, 3], [1, 9]) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_uint8_path = 'reshape_{}{}.onnx'.format(activation_type_str, weight_type_str) model_uint8_qdq_path = 'reshape_{}{}_qdq.onnx'.format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {'input': [3, 7]}) quantize_static(model_fp32_path, model_uint8_path, data_reader, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) # make sure transpose become xint8 operator, its input name could tell that check_op_nodes( self, model_uint8_path, lambda node: (node.name != "reshape_node" or node.input[0] != 'matmul_output')) qnode_counts = { 'QLinearMatMul': 1, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'Reshape': 1 } check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update( {'DequantizeLinear': [['i', 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qdqnode_counts = { 'MatMul': 1, 'QuantizeLinear': 3, 'DequantizeLinear': 4, 'Reshape': 1 } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())
def quantize_concat_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = "concat_fp32.onnx" self.construct_model(model_fp32_path) data_reader = InputFeedsNegOneZeroOne(1, {"input": [1, 3, 15, 15]}) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_q8_path = "concat_{}{}.onnx".format(activation_type_str, weight_type_str) model_q8_qdq_path = "concat_{}{}_qdq.onnx".format(activation_type_str, weight_type_str) # Verify QOperator mode data_reader.rewind() quantize_static( model_fp32_path, model_q8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qnode_counts = { "QLinearConv": 3, "QuantizeLinear": 1, "DequantizeLinear": 1, "QLinearConcat": 1, } check_op_type_count(self, model_q8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update( { "QLinearConcat": [ ["i", 1, activation_proto_qtype], ["i", 4, activation_proto_qtype], ["i", 7, activation_proto_qtype], ] } ) check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "Conv": 3, "QuantizeLinear": 5, "DequantizeLinear": 8, "Concat": 1, } check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
def verify_quantize_with_pad_mode( self, pad_mode, constant_value=None, quantize_mode="static", rtol=0.01, atol=0.05, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}, ): np.random.seed(108) tag_pad_mode = pad_mode if pad_mode is not None else "none" tag_constant_value = "" if constant_value is None else "_value" model_fp32_path = "qop_pad_{}_fp32_{}{}.onnx".format(quantize_mode, tag_pad_mode, tag_constant_value) data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]}) self.construct_model_conv_pad( model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31], pad_mode, [0, 0, 1, 2, 0, 0, 3, 4], constant_value=constant_value, ) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_i8_path = "qop_pad_{}_i8_{}{}_{}{}.onnx".format( quantize_mode, tag_pad_mode, tag_constant_value, activation_type_str, weight_type_str, ) data_reader.rewind() self.quantize_model( model_fp32_path, model_i8_path, None if quantize_mode != "static" else data_reader, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad, # which means pad node is running in quantized semantic. # In dynamic quantize mode, pad operator in fact not quantized as input is fp32. if quantize_mode != "static": kwargs = {"DynamicQuantizeLinear": 1} if activation_type == QuantType.QUInt8 else {"QuantizeLinear": 1} else: kwargs = {"DequantizeLinear": 2, "QuantizeLinear": 1} check_op_type_count(self, model_i8_path, **kwargs) # check node input/output type if such node exists in the graph qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } qnode_io_qtypes.update({"DequantizeLinear": [["i", 2, activation_proto_qtype]]}) qnode_io_qtypes.update({"ConvInteger": [["i", 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness( self, model_fp32_path, model_i8_path, data_reader.get_next(), rtol=rtol, atol=atol, )
def quantize_argmax_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = "argmax_fp32.onnx" self.construct_model_argmax(model_fp32_path, [1, 256, 128, 128], [1, 32, 128]) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = "u8" if (activation_type == QuantType.QUInt8) else "s8" weight_type_str = "u8" if (weight_type == QuantType.QUInt8) else "s8" model_uint8_path = "argmax_{}{}.onnx".format(activation_type_str, weight_type_str) model_uint8_qdq_path = "argmax_{}{}_qdq.onnx".format( activation_type_str, weight_type_str) model_uint8_qdq_trt_path = "argmax_{}{}_qdq_trt.onnx".format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {"input": [1, 256, 128, 128]}) quantize_static( model_fp32_path, model_uint8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) # make sure argmax become xint8 operator, its input name could tell that check_op_nodes( self, model_uint8_path, lambda node: not (node.name == "argmax_node" and node.input[0] == "conv_output"), ) qnode_counts = {"QuantizeLinear": 1, "QLinearConv": 1, "ArgMax": 1} check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static( model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, ) qdqnode_counts = { "QuantizeLinear": 2, "DequantizeLinear": 3, "ArgMax": 1 } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next()) # Verify QDQ mode for TensorRT data_reader.rewind() quantize_static( model_fp32_path, model_uint8_qdq_trt_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options, op_types_to_quantize=["ArgMax"], ) qdqnode_counts = { "QuantizeLinear": 1, "DequantizeLinear": 1, "ArgMax": 1 } check_op_type_count(self, model_uint8_qdq_trt_path, **qdqnode_counts) qnode_io_qtypes = { "QuantizeLinear": [ ["i", 2, activation_proto_qtype], ["o", 0, activation_proto_qtype], ] } check_qtype_by_node_type(self, model_uint8_qdq_trt_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_trt_path, data_reader.get_next())
def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static', rtol=0.01, atol=0.05, activation_type=QuantType.QUInt8, weight_type=QuantType.QUInt8, extra_options={}): np.random.seed(108) tag_pad_mode = pad_mode if pad_mode is not None else 'none' tag_constant_value = '' if constant_value is None else '_value' model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format( quantize_mode, tag_pad_mode, tag_constant_value) data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]}) self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31], pad_mode, [0, 0, 1, 2, 0, 0, 3, 4], constant_value=constant_value) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_i8_path = 'qop_pad_{}_i8_{}{}_{}{}.onnx'.format( quantize_mode, tag_pad_mode, tag_constant_value, activation_type_str, weight_type_str) data_reader.rewind() self.quantize_model(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad, # which means pad node is running in quantized semantic. # In dynamic quantize mode, pad operator in fact not quantized as input is fp32. if quantize_mode != 'static': kwargs = { 'DynamicQuantizeLinear': 1 } if activation_type == QuantType.QUInt8 else { 'QuantizeLinear': 1 } else: kwargs = {'DequantizeLinear': 2, 'QuantizeLinear': 1} check_op_type_count(self, model_i8_path, **kwargs) # check node input/output type if such node exists in the graph qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update( {'DequantizeLinear': [['i', 2, activation_proto_qtype]]}) qnode_io_qtypes.update( {'ConvInteger': [['i', 2, activation_proto_qtype]]}) check_qtype_by_node_type(self, model_i8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next(), rtol=rtol, atol=atol)
def quantize_concat_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = 'concat_fp32.onnx' self.construct_model(model_fp32_path) data_reader = InputFeedsNegOneZeroOne(1, {'input': [1, 3, 15, 15]}) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_q8_path = 'concat_{}{}.onnx'.format(activation_type_str, weight_type_str) model_q8_qdq_path = 'concat_{}{}_qdq.onnx'.format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_path, data_reader, quant_format=QuantFormat.QOperator, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qnode_counts = { 'QLinearConv': 3, 'QuantizeLinear': 1, 'DequantizeLinear': 1, 'QLinearConcat': 1 } check_op_type_count(self, model_q8_path, **qnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } qnode_io_qtypes.update({ 'QLinearConcat': [['i', 1, activation_proto_qtype], ['i', 4, activation_proto_qtype], ['i', 7, activation_proto_qtype]] }) check_qtype_by_node_type(self, model_q8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static(model_fp32_path, model_q8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qdqnode_counts = { 'Conv': 3, 'QuantizeLinear': 5, 'DequantizeLinear': 8, 'Concat': 1 } check_op_type_count(self, model_q8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } check_qtype_by_node_type(self, model_q8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_q8_qdq_path, data_reader.get_next())
def quantize_argmax_test(self, activation_type, weight_type, extra_options={}): np.random.seed(1) model_fp32_path = 'argmax_fp32.onnx' self.construct_model_argmax(model_fp32_path, [1, 256, 128, 128], [1, 32, 128]) activation_proto_qtype = TensorProto.UINT8 if activation_type == QuantType.QUInt8 else TensorProto.INT8 activation_type_str = 'u8' if (activation_type == QuantType.QUInt8) else 's8' weight_type_str = 'u8' if (weight_type == QuantType.QUInt8) else 's8' model_uint8_path = 'argmax_{}{}.onnx'.format(activation_type_str, weight_type_str) model_uint8_qdq_path = 'argmax_{}{}_qdq.onnx'.format( activation_type_str, weight_type_str) # Verify QOperator mode data_reader = self.input_feeds(1, {'input': [1, 256, 128, 128]}) quantize_static(model_fp32_path, model_uint8_path, data_reader, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) # make sure argmax become xint8 operator, its input name could tell that check_op_nodes( self, model_uint8_path, lambda node: not ( node.name == "argmax_node" and node.input[0] == 'conv_output')) qnode_counts = {'QuantizeLinear': 1, 'QLinearConv': 1, 'ArgMax': 1} check_op_type_count(self, model_uint8_path, **qnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } check_qtype_by_node_type(self, model_uint8_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_path, data_reader.get_next()) # Verify QDQ mode data_reader.rewind() quantize_static(model_fp32_path, model_uint8_qdq_path, data_reader, quant_format=QuantFormat.QDQ, activation_type=activation_type, weight_type=weight_type, extra_options=extra_options) qdqnode_counts = { 'QuantizeLinear': 2, 'DequantizeLinear': 3, 'ArgMax': 1 } check_op_type_count(self, model_uint8_qdq_path, **qdqnode_counts) qnode_io_qtypes = { 'QuantizeLinear': [['i', 2, activation_proto_qtype], ['o', 0, activation_proto_qtype]] } check_qtype_by_node_type(self, model_uint8_qdq_path, qnode_io_qtypes) data_reader.rewind() check_model_correctness(self, model_fp32_path, model_uint8_qdq_path, data_reader.get_next())