def _init_from_converter(self, options: QuantizationDebugOptions, converter: TFLiteConverter, calibrated_model: Optional[bytes] = None, float_model: Optional[bytes] = None) -> None: """Convert the model and apply options. Converts the quantized model and initializes a quantized model interpreter with the quantized model. Returns a float model interpreter if float model is provided. Args: options: a QuantizationDebugOptions object. converter: an initialized tf.lite.TFLiteConverter. calibrated_model: Calibrated model bytes. float_model: Float model bytes. """ self.quant_model = convert.mlir_quantize( calibrated_model, disable_per_channel=converter._experimental_disable_per_channel, # pylint: disable=protected-access fully_quantize=options.fully_quantize, enable_numeric_verify=True, denylisted_ops=options.denylisted_ops, denylisted_nodes=options.denylisted_nodes) self._quant_interpreter = _interpreter.Interpreter( model_content=self.quant_model) self._float_interpreter = None if float_model is not None: self._float_interpreter = _interpreter.Interpreter( model_content=float_model)
def testCalibrateAndQuantizeBuiltinInt16(self): func, calibration_gen = self._getCalibrationQuantizeModel() # Convert float model. float_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) float_tflite = float_converter.convert() self.assertTrue(float_tflite) converter = lite.TFLiteConverterV2.from_concrete_functions([func]) # TODO(b/156309549): We should add INT16 to the builtin types. converter.target_spec.supported_ops = [ lite.OpsSet.TFLITE_BUILTINS_INT8 ] converter.representative_dataset = calibration_gen converter._experimental_calibrate_only = True calibrated_tflite = converter.convert() quantized_tflite = mlir_quantize(calibrated_tflite, inference_type=_types_pb2.QUANTIZED_INT16) self.assertTrue(quantized_tflite) # The default input and output types should be float. interpreter = Interpreter(model_content=quantized_tflite) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertLen(input_details, 1) self.assertEqual(np.float32, input_details[0]['dtype']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite), len(float_tflite))
def _quantize_model(func, calibration_gen, quantized_io=False, debug=True): """Quantizes model, in debug or normal mode.""" converter = _quantize_converter(func, calibration_gen, debug) if debug: calibrated = converter.convert() return convert.mlir_quantize(calibrated, enable_numeric_verify=True, fully_quantize=quantized_io) else: return converter.convert()
def _get_quantized_model(self, is_debug: bool) -> bytes: if not self.converter: raise ValueError('No converter found, use this function with the ' 'converter option in the constructor.') return convert.mlir_quantize( self.calibrated_model, disable_per_channel=self.converter._experimental_disable_per_channel, # pylint: disable=protected-access fully_quantize=self._debug_options.fully_quantize, enable_numeric_verify=is_debug, blocklisted_ops=self._debug_options.denylisted_ops, blocklisted_nodes=self._debug_options.denylisted_nodes)
def _quantize_model(self, calibrated_model: bytes, disable_per_channel: bool, fully_quantize: bool, enable_numeric_verify: bool, denylisted_ops: Optional[List[str]] = None, denylisted_nodes: Optional[List[str]] = None) -> bytes: return convert.mlir_quantize( calibrated_model, disable_per_channel=disable_per_channel, fully_quantize=fully_quantize, enable_numeric_verify=enable_numeric_verify, blocklisted_ops=denylisted_ops, blocklisted_nodes=denylisted_nodes)
def _quantize_model(func, calibration_gen, debug=True): """Quantizes model, in debug or normal mode.""" converter = lite.TFLiteConverterV2.from_concrete_functions([func]) converter.target_spec.supported_ops = [lite.OpsSet.TFLITE_BUILTINS_INT8] converter.representative_dataset = calibration_gen # Create a TFLite model with new quantizer and numeric verify ops. converter.optimizations = [lite.Optimize.DEFAULT] converter.experimental_new_quantizer = True if debug: converter._experimental_calibrate_only = True calibrated = converter.convert() return convert.mlir_quantize(calibrated, enable_numeric_verify=True) else: return converter.convert()