def _modify_model_output_type(model, inference_output_type=dtypes.float32): """Modify model output type.""" if inference_output_type == dtypes.float32: return subgraph = model.subgraphs[0] tensors = subgraph.tensors operators = subgraph.operators # Find all dequantize operators dequant_opcode_idxs = get_dequantize_opcode_idx(model) if operators and not dequant_opcode_idxs: for output in subgraph.outputs: output_type = _convert_tflite_enum_type_to_tf_type( tensors[output].type) if output_type == dtypes.float32: raise ValueError("Model output is not dequantized.") # None of the outputs have float32, then they must be int16, int8, or bool return # Validate that the model output is dequantized output_dequant_ops = [] for op in operators: # Find operators that dequantize model output if (op.opcodeIndex in dequant_opcode_idxs and op.outputs[0] in subgraph.outputs): # If found, validate that the operator's output type is float quant_tensor, float_tensor = tensors[op.inputs[0]], tensors[ op.outputs[0]] float_type = _convert_tflite_enum_type_to_tf_type( float_tensor.type) if float_type != dtypes.float32: if float_type == inference_output_type: continue else: raise ValueError( "Initial model output type must be tf.float32. Expected type for " "tensor with name '{}' is tf.float32, instead type is {}" .format(float_tensor.name, get_tf_type_name(float_type))) # If found, validate that the operator input is quantized and compatible # with the final model output type quant_type = _convert_tflite_enum_type_to_tf_type( quant_tensor.type) if quant_type not in _MAP_QUANT_TO_IO_TYPES: raise ValueError( "Initial model output is not dequantized. Expected type for " "tensor with name '{}' should be in {}, instead type is {}" .format( quant_tensor.name, tuple( get_tf_type_name(t) for t in _MAP_QUANT_TO_IO_TYPES.keys()), get_tf_type_name(quant_type))) else: inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type] if inference_output_type not in inference_io_types: raise ValueError( "Unsupported `inference_output_type` value. Expected to be in " "{}, instead got {}.".format( tuple( get_tf_type_name(t) for t in inference_io_types), get_tf_type_name(inference_output_type))) output_dequant_ops.append(op) if len(subgraph.outputs) != len(output_dequant_ops): logging.warning( "For model outputs containing unsupported operations which cannot be " "quantized, the `inference_output_type` attribute will default to the " "original type.") # Modify model output type if inference_output_type == dtypes.uint8: # Find a quantize operator quant_opcode_idx = -1 for idx, opcode in enumerate(model.operatorCodes): builtin_code = schema_util.get_builtin_code_from_operator_code( opcode) if builtin_code == schema_fb.BuiltinOperator.QUANTIZE: quant_opcode_idx = idx break # Create a quantize operator, if none exist if quant_opcode_idx == -1: quant_op = schema_fb.OperatorCodeT() quant_op.builtinCode = schema_fb.BuiltinOperator.QUANTIZE quant_op.deprecatedBuiltinCode = schema_fb.BuiltinOperator.QUANTIZE model.operatorCodes.append(quant_op) quant_opcode_idx = len(model.operatorCodes) - 1 # Change dequant op (int8 to float) to quant op (int8 to uint8) for op in output_dequant_ops: op.opcodeIndex = quant_opcode_idx int8_quantization = tensors[op.inputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [ int8_quantization.zeroPoint[0] + 128 ] tensors[op.outputs[0]].quantization = uint8_quantization tensors[op.outputs[0]].type = schema_fb.TensorType.UINT8 elif inference_output_type in _MAP_QUANT_TO_IO_TYPES: # Remove the outputs and the dequant operator remove_tensors_idxs = set() for op in output_dequant_ops: subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] if model.signatureDefs: signature_def = model.signatureDefs[0] for i in range(len(signature_def.outputs)): if signature_def.outputs[i].tensorIndex == op.outputs[0]: signature_def.outputs[i].tensorIndex = op.inputs[0] remove_tensors_idxs.add(op.outputs[0]) operators.remove(op) # Remove tensors marked for deletion. _remove_tensors_from_model(model, remove_tensors_idxs) else: raise ValueError( "Unsupported `inference_output_type` value {}.".format( get_tf_type_name(inference_output_type)))
def _modify_model_input_type_per_subgraph(model, subgraph_index, signature_index, inference_input_type): """Modify model input type per subgraph.""" subgraph = model.subgraphs[subgraph_index] tensors = subgraph.tensors operators = subgraph.operators # Find all quantize operators quant_opcode_idxs = get_quantize_opcode_idx(model) if operators and not quant_opcode_idxs: for input_idx in subgraph.inputs: input_type = _convert_tflite_enum_type_to_tf_type( tensors[input_idx].type) if input_type == dtypes.float32: raise ValueError("Model input is not dequantized.") # None of the inputs have float32, then they must be int16, int8, or bool return # Validate that the model input is quantized input_quant_ops = [] for op in operators: # Find operators that quantize model input if op.opcodeIndex in quant_opcode_idxs and op.inputs[ 0] in subgraph.inputs: float_tensor, quant_tensor = tensors[op.inputs[0]], tensors[ op.outputs[0]] # If found, validate that the operator's input type is float float_type = _convert_tflite_enum_type_to_tf_type( float_tensor.type) if float_type != dtypes.float32: if float_type == inference_input_type: continue else: raise ValueError( "Initial model input type must be tf.float32. Expected type for " "tensor with name '{}' is tf.float32, instead type is {}" .format(float_tensor.name, get_tf_type_name(float_type))) # If found, validate that the operator output is quantized and compatible # with the final model input type quant_type = _convert_tflite_enum_type_to_tf_type( quant_tensor.type) if quant_type not in _MAP_QUANT_TO_IO_TYPES: raise ValueError( "Initial model input is not quantized. Expected type for " "tensor with name '{}' should be in {}, instead type is {}" .format( quant_tensor.name, tuple( get_tf_type_name(t) for t in _MAP_QUANT_TO_IO_TYPES.keys()), get_tf_type_name(quant_type))) else: inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type] if inference_input_type not in inference_io_types: raise ValueError( "Unsupported `inference_input_type` value. Expected to be in " "{}, instead got {}.".format( tuple( get_tf_type_name(t) for t in inference_io_types), get_tf_type_name(inference_input_type))) input_quant_ops.append(op) if len(subgraph.inputs) != len(input_quant_ops): logging.warning( "For model inputs containing unsupported operations which cannot be " "quantized, the `inference_input_type` attribute will default to the " "original type.") # Modify model input type if inference_input_type == dtypes.uint8: # Change quant op (float to int8) to quant op (uint8 to int8) for op in input_quant_ops: int8_quantization = tensors[op.outputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [ int8_quantization.zeroPoint[0] + 128 ] tensors[op.inputs[0]].quantization = uint8_quantization tensors[op.inputs[0]].type = schema_fb.TensorType.UINT8 elif inference_input_type in _MAP_QUANT_TO_IO_TYPES: # Remove the inputs and the quant operator remove_tensors_idxs = set() for op in input_quant_ops: subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] if signature_index >= 0: signature_def = model.signatureDefs[signature_index] for i in range(len(signature_def.inputs)): if signature_def.inputs[i].tensorIndex == op.inputs[0]: signature_def.inputs[i].tensorIndex = op.outputs[0] remove_tensors_idxs.add(op.inputs[0]) operators.remove(op) # Remove tensors marked for deletion. _remove_tensors_from_model(model, remove_tensors_idxs) else: raise ValueError("Unsupported `inference_input_type` value {}.".format( get_tf_type_name(inference_input_type)))
def _modify_model_input_type(model, inference_input_type=dtypes.float32): """Modify model input type.""" if inference_input_type == dtypes.float32: return subgraph = model.subgraphs[0] tensors = subgraph.tensors operators = subgraph.operators # Find all quantize operators quant_opcode_idxs = [] for idx, opcode in enumerate(model.operatorCodes): builtin_code = schema_util.get_builtin_code_from_operator_code(opcode) if builtin_code == schema_fb.BuiltinOperator.QUANTIZE: quant_opcode_idxs.append(idx) if operators and not quant_opcode_idxs: raise ValueError("Model input is not quantized.") # Validate that the model input is quantized input_quant_ops = [] for op in operators: # Find operators that quantize model input if op.opcodeIndex in quant_opcode_idxs and op.inputs[0] in subgraph.inputs: float_tensor, quant_tensor = tensors[op.inputs[0]], tensors[op.outputs[0]] # If found, validate that the operator's input type is float float_type = _convert_tflite_enum_type_to_tf_type(float_tensor.type) if float_type != dtypes.float32: raise ValueError( "Initial model input type must be tf.float32. Expected type for " "tensor with name '{}' is tf.float32, instead type is {}".format( float_tensor.name, _get_tf_type_name(float_type))) # If found, validate that the operator output is quantized and compatible # with the final model input type quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type) if quant_type not in _MAP_QUANT_TO_IO_TYPES: raise ValueError( "Initial model input is not quantized. Expected type for " "tensor with name '{}' should be in {}, instead type is {}".format( quant_tensor.name, tuple(_get_tf_type_name(t) for t in _MAP_QUANT_TO_IO_TYPES.keys()), _get_tf_type_name(quant_type))) else: inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type] if inference_input_type not in inference_io_types: raise ValueError( "Unsupported `inference_input_type` value. Expected to be in " "{}, instead got {}.".format( tuple(_get_tf_type_name(t) for t in inference_io_types), _get_tf_type_name(inference_input_type))) input_quant_ops.append(op) if len(subgraph.inputs) != len(input_quant_ops): logging.warning( "For model inputs containing unsupported operations which cannot be " "quantized, the `inference_input_type` attribute will default to the " "original type." ) # Modify model input type if inference_input_type == dtypes.uint8: # Change quant op (float to int8) to quant op (uint8 to int8) for op in input_quant_ops: int8_quantization = tensors[op.outputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] tensors[op.inputs[0]].quantization = uint8_quantization tensors[op.inputs[0]].type = schema_fb.TensorType.UINT8 elif inference_input_type in _MAP_QUANT_TO_IO_TYPES: # Remove the inputs and the quant operator remove_tensors_idxs = set() for op in input_quant_ops: subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] remove_tensors_idxs.add(op.inputs[0]) operators.remove(op) # Remove tensors marked for deletion. _remove_tensors_from_model(model, remove_tensors_idxs) else: raise ValueError( "Unsupported `inference_input_type` value {}.".format( _get_tf_type_name(inference_input_type)))
def modify_integer_quantized_model_io_type( model, inference_input_type=_lite_constants.FLOAT, inference_output_type=_lite_constants.FLOAT): """Modify the float input/output type of an integer quantized model. Args: model: An int8 quantized tflite model with float input and output. inference_input_type: tf.DType representing final input type. (default tf.float32) inference_output_type: tf.DType representing final output type. (default tf.float32) Returns: An int8 quantized tflite model with modified input and/or output type. Raises: ValueError: If the model is not int8 quantized or the inference_input_type and/or inference_input_type is unsupported. RuntimeError: If the modification was unsuccessful. """ # Return if input and output types default to float if inference_input_type == _lite_constants.FLOAT and \ inference_output_type == _lite_constants.FLOAT: return model # Validate input and output types if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: raise ValueError("The `inference_input_type` should be in {}".format( tuple( _get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: raise ValueError("The `inference_output_type` should be in {}".format( tuple( _get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) logging.debug( ("Attempting to modify the model input from tf.float32 to %s " "and output from tf.float32 to %s"), _get_dtype_name(inference_input_type), _get_dtype_name(inference_output_type)) # Convert the model to an object model = _convert_model_from_bytearray_to_object(model) # Validate the integer quantized model input_quant_ops, output_dequant_ops = \ _validate_and_find_int8_quantized_inputs_outputs(model) # Initialize references and variables if len(model.subgraphs) > 1: raise ValueError("Model must only have one subgraph. Instead, it has " "{} subgraphs.".format(len(model.subgraphs))) subgraph = model.subgraphs[0] tensors = subgraph.tensors operators = subgraph.operators remove_tensors_idxs = set() # Modify model input type if inference_input_type == _lite_constants.QUANTIZED_UINT8: # Change quant op (float to int8) to quant op (uint8 to int8) for op in input_quant_ops: int8_quantization = tensors[op.outputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [ int8_quantization.zeroPoint[0] + 128 ] tensors[op.inputs[0]].quantization = uint8_quantization tensors[op.inputs[0]].type = schema_fb.TensorType.UINT8 elif inference_input_type == _lite_constants.INT8: # Remove the inputs and the quant operator for op in input_quant_ops: subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] remove_tensors_idxs.add(op.inputs[0]) operators.remove(op) # Modify model output type if inference_output_type == _lite_constants.QUANTIZED_UINT8: # Change dequant op (int8 to float) to quant op (int8 to uint8) for op in output_dequant_ops: op.opcodeIndex = input_quant_ops[0].opcodeIndex int8_quantization = tensors[op.inputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [ int8_quantization.zeroPoint[0] + 128 ] tensors[op.outputs[0]].quantization = uint8_quantization tensors[op.outputs[0]].type = schema_fb.TensorType.UINT8 elif inference_output_type == _lite_constants.INT8: # Remove the outputs and the dequant operator for op in output_dequant_ops: subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] remove_tensors_idxs.add(op.outputs[0]) operators.remove(op) # Remove tensors marked for deletion. _remove_tensors_from_model(model, remove_tensors_idxs) # Convert the model to a bytearray model = _convert_model_from_object_to_bytearray(model) return model
def _modify_model_output_type(model, inference_output_type=dtypes.float32): """Modify model output type.""" if inference_output_type == dtypes.float32: return if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: raise ValueError( "Unsupported `inference_output_type` value. Expected to be in {}, " "instead got {}.".format( tuple( _get_tf_type_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES), _get_tf_type_name(inference_output_type))) subgraph = model.subgraphs[0] tensors = subgraph.tensors operators = subgraph.operators # Find all dequantize operators dequant_opcode_idxs = [] for idx, opcode in enumerate(model.operatorCodes): if opcode.builtinCode == schema_fb.BuiltinOperator.DEQUANTIZE: dequant_opcode_idxs.append(idx) if not dequant_opcode_idxs: raise ValueError("Model output is not dequantized.") # Ensure that the model output is dequantized output_dequant_ops = [] for op in operators: # Check if the operator dequantizes an output if op.opcodeIndex in dequant_opcode_idxs and \ op.outputs[0] in subgraph.outputs: # If found, validate the operator input/output tensor types int_tensor, float_tensor = tensors[op.inputs[0]], tensors[ op.outputs[0]] if float_tensor.type != schema_fb.TensorType.FLOAT32: raise ValueError( "Model output type must be tf.float32. Expected type for tensor " "with name '{}' is tf.float32, instead type is {}".format( float_tensor.name, _get_tf_type_name( _convert_tflite_enum_type_to_tf_type( float_tensor.type)))) if int_tensor.type != schema_fb.TensorType.INT8: raise ValueError( "Model output is not dequantized. Expected type for tensor " "with name '{}' is tf.int8, instead type is {}".format( int_tensor.name, _get_tf_type_name( _convert_tflite_enum_type_to_tf_type( int_tensor.type)))) output_dequant_ops.append(op) if len(subgraph.outputs) != len(output_dequant_ops): raise ValueError("Model output is not dequantized.") # Modify model output type if inference_output_type == dtypes.uint8: # Find a quantize operator quant_opcode_idx = -1 for idx, opcode in enumerate(model.operatorCodes): if opcode.builtinCode == schema_fb.BuiltinOperator.QUANTIZE: quant_opcode_idx = idx break # Create a quantize operator, if none exist if quant_opcode_idx == -1: quant_op = schema_fb.OperatorCodeT() quant_op.builtinCode = schema_fb.BuiltinOperator.QUANTIZE quant_op.deprecatedBuiltinCode = schema_fb.BuiltinOperator.QUANTIZE model.operatorCodes.append(quant_op) quant_opcode_idx = len(model.operatorCodes) - 1 # Change dequant op (int8 to float) to quant op (int8 to uint8) for op in output_dequant_ops: op.opcodeIndex = quant_opcode_idx int8_quantization = tensors[op.inputs[0]].quantization uint8_quantization = schema_fb.QuantizationParametersT() uint8_quantization.scale = [int8_quantization.scale[0]] uint8_quantization.zeroPoint = [ int8_quantization.zeroPoint[0] + 128 ] tensors[op.outputs[0]].quantization = uint8_quantization tensors[op.outputs[0]].type = schema_fb.TensorType.UINT8 elif inference_output_type == dtypes.int8: # Remove the outputs and the dequant operator remove_tensors_idxs = set() for op in output_dequant_ops: subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] remove_tensors_idxs.add(op.outputs[0]) operators.remove(op) # Remove tensors marked for deletion. _remove_tensors_from_model(model, remove_tensors_idxs) else: raise ValueError( "Unsupported `inference_output_type` value. Expected to be in {}, " "instead got {}.".format( tuple( _get_tf_type_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES), _get_tf_type_name(inference_output_type)))