Beispiel #1
0
    def _init_from_converter(self,
                             options: QuantizationDebugOptions,
                             converter: TFLiteConverter,
                             calibrated_model: Optional[bytes] = None,
                             float_model: Optional[bytes] = None) -> None:
        """Convert the model and apply options.

    Converts the quantized model and initializes a quantized model interpreter
    with the quantized model. Returns a float model interpreter if float model
    is provided.

    Args:
      options: a QuantizationDebugOptions object.
      converter: an initialized tf.lite.TFLiteConverter.
      calibrated_model: Calibrated model bytes.
      float_model: Float model bytes.
    """
        self.quant_model = convert.mlir_quantize(
            calibrated_model,
            disable_per_channel=converter._experimental_disable_per_channel,  # pylint: disable=protected-access
            fully_quantize=options.fully_quantize,
            enable_numeric_verify=True,
            denylisted_ops=options.denylisted_ops,
            denylisted_nodes=options.denylisted_nodes)
        self._quant_interpreter = _interpreter.Interpreter(
            model_content=self.quant_model)
        self._float_interpreter = None
        if float_model is not None:
            self._float_interpreter = _interpreter.Interpreter(
                model_content=float_model)
Beispiel #2
0
  def testCalibrateAndQuantizeBuiltinInt16(self):
    func, calibration_gen = self._getCalibrationQuantizeModel()

    # Convert float model.
    float_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
    float_tflite = float_converter.convert()
    self.assertTrue(float_tflite)

    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
    # TODO(b/156309549): We should add INT16 to the builtin types.
    converter.target_spec.supported_ops = [
        lite.OpsSet.TFLITE_BUILTINS_INT8
    ]
    converter.representative_dataset = calibration_gen
    converter._experimental_calibrate_only = True
    calibrated_tflite = converter.convert()
    quantized_tflite = mlir_quantize(calibrated_tflite,
                                     inference_type=_types_pb2.QUANTIZED_INT16)

    self.assertTrue(quantized_tflite)

    # The default input and output types should be float.
    interpreter = Interpreter(model_content=quantized_tflite)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    self.assertLen(input_details, 1)
    self.assertEqual(np.float32, input_details[0]['dtype'])
    output_details = interpreter.get_output_details()
    self.assertLen(output_details, 1)
    self.assertEqual(np.float32, output_details[0]['dtype'])

    # Ensure that the quantized weights tflite model is smaller.
    self.assertLess(len(quantized_tflite), len(float_tflite))
Beispiel #3
0
def _quantize_model(func, calibration_gen, quantized_io=False, debug=True):
    """Quantizes model, in debug or normal mode."""
    converter = _quantize_converter(func, calibration_gen, debug)
    if debug:
        calibrated = converter.convert()
        return convert.mlir_quantize(calibrated,
                                     enable_numeric_verify=True,
                                     fully_quantize=quantized_io)
    else:
        return converter.convert()
Beispiel #4
0
  def _get_quantized_model(self, is_debug: bool) -> bytes:
    if not self.converter:
      raise ValueError('No converter found, use this function with the '
                       'converter option in the constructor.')

    return convert.mlir_quantize(
        self.calibrated_model,
        disable_per_channel=self.converter._experimental_disable_per_channel,  # pylint: disable=protected-access
        fully_quantize=self._debug_options.fully_quantize,
        enable_numeric_verify=is_debug,
        blocklisted_ops=self._debug_options.denylisted_ops,
        blocklisted_nodes=self._debug_options.denylisted_nodes)
Beispiel #5
0
 def _quantize_model(self,
                     calibrated_model: bytes,
                     disable_per_channel: bool,
                     fully_quantize: bool,
                     enable_numeric_verify: bool,
                     denylisted_ops: Optional[List[str]] = None,
                     denylisted_nodes: Optional[List[str]] = None) -> bytes:
     return convert.mlir_quantize(
         calibrated_model,
         disable_per_channel=disable_per_channel,
         fully_quantize=fully_quantize,
         enable_numeric_verify=enable_numeric_verify,
         blocklisted_ops=denylisted_ops,
         blocklisted_nodes=denylisted_nodes)
Beispiel #6
0
def _quantize_model(func, calibration_gen, debug=True):
    """Quantizes model, in debug or normal mode."""
    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
    converter.target_spec.supported_ops = [lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.representative_dataset = calibration_gen

    # Create a TFLite model with new quantizer and numeric verify ops.
    converter.optimizations = [lite.Optimize.DEFAULT]
    converter.experimental_new_quantizer = True
    if debug:
        converter._experimental_calibrate_only = True
        calibrated = converter.convert()
        return convert.mlir_quantize(calibrated, enable_numeric_verify=True)
    else:
        return converter.convert()