def quantize_torch_qat_export(
    model: Union[ModelProto, str],
    output_file_path: Union[str, None] = None,
    inplace: bool = True,
) -> ModelProto:
    """
    :param model: The model to convert, or a file path to it
    :param output_file_path: File path to save the converted model to
    :param inplace: If true, does conversion of model in place. Default is true
    :return: Converts a model exported from a torch QAT session from a QAT graph with
        fake quantize ops surrounding operations to a quantized graph with quantized
        operations. All quantized Convs and FC inputs and outputs be surrounded by
        fake quantize ops
    """
    if isinstance(model, str):
        model = onnx.load(model)

    if not inplace:
        model = deepcopy(model)

    _fold_qat_conv_bns(model)
    _fold_relu_quants(model)
    _convert_single_constants_to_initializers(model)
    _delete_repeated_qat_blocks(model)
    _convert_quantizable_ops(model)
    quantize_resnet_identity_add_inputs(model)
    quantized_residual_add_optim(model)
    _remove_duplicate_quantize__ops(model)
    ONNXGraph(model).sort_nodes_topologically()

    if output_file_path:
        onnx.save(model, output_file_path)

    return model
Exemple #2
0
def _test_resnet_identity_quant(model_path, has_resnet_block, save_optimized):
    quant_model = onnx.load(model_path)
    if has_resnet_block:  # run ResNet optimization
        assert quantize_resnet_identity_add_inputs(quant_model)
    # check that running the optimization has no affect even if its already been run
    assert not quantize_resnet_identity_add_inputs(quant_model)
    if save_optimized:
        onnx.save(quant_model, model_path)
def quantize_torch_qat_export(model: ModelProto, inplace: bool = True) -> ModelProto:
    """
    :param model: The model to convert
    :param inplace: If true, does conversion of model in place. Default is true
    :return: Converts a model exported from a torch QAT session from a QAT graph with
        fake quantize ops surrounding operations to a quantized graph with quantized
        operations. All quantized Convs and FC inputs and outputs be surrounded by
        fake quantize ops
    """
    if not inplace:
        model = deepcopy(model)

    _fold_qat_conv_bns(model)
    _fold_relu_quants(model)
    _convert_quantization_constants_to_initializers(model)
    _delete_repeated_qat_blocks(model)
    _convert_quantizable_ops(model)
    quantize_resnet_identity_add_inputs(model)
    _remove_duplicate_quantize__ops(model)

    return model
Exemple #4
0
def quantize_model_post_training(
    onnx_file: str,
    data_loader: DataLoader,
    output_model_path: str = None,
    calibrate_op_types: Iterable[str] = ("Conv", "MatMul", "Gemm"),
    exclude_nodes: List[str] = None,
    include_nodes: List[str] = None,
    augmented_model_path: str = None,
    static: bool = True,
    symmetric_weight: bool = False,
    force_fusions: bool = False,
    show_progress: bool = True,
    run_extra_opt: bool = True,
) -> Union[None, onnx.ModelProto]:
    """
    Wrapper function for calibrating and quantizing an Onnx model

    :param onnx_file: File path to saved Onnx model to calibrate and quantize
    :param data_loader: Iterable of lists of model inputs or filepath to directory
        of numpy arrays. If the model has multiple inputs and an .npz file is
        provided, the function will try to extract each input from the .npz file
        by name.  If the names do not match, the function will try to extract the
        inputs in order.  Will raise an exception of the number of inputs does not
        match the number of arrays in the .npz file.
    :param output_model_path: Filepath to where the quantized model should be saved to.
        If not provided, then the quantized Onnx model object will be returned instead.
    :param calibrate_op_types: List of Onnx ops names to calibrate and quantize within
        the model. Currently Onnx only supports quantizing 'Conv' and 'MatMul' ops.
    :param exclude_nodes: List of operator names that should not be quantized
    :param include_nodes: List of operator names force to be quantized
    :param augmented_model_path: file path to save augmented model to for verification
    :param static: True to use static quantization. Default is static.
    :param symmetric_weight: True to use symmetric weight quantization.
        Default is False
    :param force_fusions: True to force fusions in quantization. Default is False
    :param show_progress: If true, will display a tqdm progress bar during calibration.
        Default is True
    :param run_extra_opt: If true, will run additional optimizations on the quantized
        model. Currently the only optimization is quantizing identity relu outputs in
        ResNet blocks
    :return: None or quantized onnx model object if output_model_path is not provided
    """
    calibrator = CalibrationSession(
        onnx_file,
        calibrate_op_types,
        exclude_nodes,
        include_nodes,
        augmented_model_path,
        static,
    )

    # data_loader must have a finite number of examples
    assert not data_loader.infinite

    data_iterator = tqdm(data_loader) if show_progress else data_loader

    for input_batch, _ in data_iterator:
        calibrator.process_batch(input_batch)

    quantization_params_dict = calibrator.get_quantization_params_dict()
    calibrated_quantized_model = quantize(
        calibrator.model,
        quantization_mode=QuantizationMode.QLinearOps,
        force_fusions=force_fusions,
        quantization_params=quantization_params_dict,
        nodes_to_exclude=exclude_nodes if exclude_nodes else None,
        symmetric_weight=symmetric_weight,
        static=static,
    )

    if run_extra_opt:
        quantize_resnet_identity_add_inputs(calibrated_quantized_model)

    if output_model_path is None:
        return calibrated_quantized_model
    else:
        onnx.save(calibrated_quantized_model, output_model_path)