예제 #1
0
def get_native_quant_patterns(
    additional_quant_patterns: Dict[Pattern, QuantizerCls] = None
) -> Dict[Pattern, QuantizerCls]:
    """
    Return a map from pattern to quantize handlers based on the default patterns and the native backend_config_dict.
    The returned map is sorted such that longer patterns will be encountered first when iterating through it.
    """
    patterns = get_default_quant_patterns()
    if additional_quant_patterns is not None:
        patterns = get_combined_dict(patterns, additional_quant_patterns)
    # TODO: currently we just extend the quantize handlers generated from
    # `get_native_backend_config_dict`
    # in the future we can just assign backend_config_dict when everything is defined
    for pattern, quantize_handler in get_pattern_to_quantize_handlers(
            get_native_backend_config_dict()).items():
        patterns[pattern] = quantize_handler
    return sorted_patterns_dict(patterns)
예제 #2
0
파일: convert.py 프로젝트: yuguo68/pytorch
def convert(
        model: GraphModule, is_reference: bool = False,
        convert_custom_config_dict: Dict[str, Any] = None,
        is_standalone_module: bool = False,
        _remove_qconfig_flag: bool = True,
        convert_qconfig_dict: Dict[str, Any] = None,
        backend_config_dict: Optional[Dict[str, Any]] = None) -> torch.nn.Module:
    """
    We will convert an observed model (a module with observer calls) to a reference
    quantized model, the rule is simple:
    1. for each observer module call in the graph, we'll convert it to calls to
       quantize and dequantize functions based on the observer instance
    2. for weighted operations like linear/conv, we need to convert them to reference
       quantized module, this requires us to know whether the dtype configured for the
       weight is supported in the backend, this is done in prepare step and the result
       is stored in observed_node_names, we can decide whether we need to swap the
       module based on this set

    standalone_module means it a submodule that is not inlined in
    parent module, and will be quantized separately as one unit.

    Returns a quantized standalone module, whether input/output is quantized is
    specified by prepare_custom_config_dict, with
    input_quantized_idxs, output_quantized_idxs, please
    see docs for prepare_fx for details
    """
    if convert_custom_config_dict is None:
        convert_custom_config_dict = {}
    node_name_to_scope, prepare_custom_config_dict, observed_node_names = restore_state(model)
    qconfig_map: Dict[str, QConfigAny] = model._qconfig_map  # type: ignore[assignment]

    # TODO this should be removed now that gpu support for quantization is being supported.
    # however in practice, as of 7/22/2021, certain functions that get called by convert expect
    # only cpu arguments.
    # As an example, in TestQuantizeFxModels.test_qat_functional_linear when device='cuda',
    # fold_weight will call quantized::linear_prepack which doesn't support QuantizedCuda backend.
    if not is_reference:
        model.cpu()

    # mapping from fully qualified module name to module instance
    # for example,
    # {
    #   '': Model(...),
    #   'linear': Linear(...),
    #   'linear.weight_fake_quant': PerChannelMinMaxObserver(...),
    # }
    # We use remove_duplicate=False here because torch.cat uses
    # the same activation_post_process module instance but different names
    modules = dict(model.named_modules(remove_duplicate=False))

    # TODO refactor this code once we update the prepare logic to have additional information on
    # which graph nodes have been observed and share that with convert to decide which observers to ignore.
    if convert_qconfig_dict:
        prepare_qconfig_dict: Dict[str, Dict[Any, Any]] = model._qconfig_dict  # type: ignore[assignment]
        modules_copy = copy.deepcopy(modules)
        convert_dict_to_ordered_dict(convert_qconfig_dict)
        if model._is_qat:
            convert_qconfig_dict = update_qconfig_for_qat(convert_qconfig_dict, {})
        convert_qconfig_dict = update_qconfig_for_fusion(model, convert_qconfig_dict)

        compare_prepare_convert_qconfig_dict(prepare_qconfig_dict, convert_qconfig_dict)  # type: ignore[arg-type]
        convert_qconfig_map = generate_qconfig_map(model, modules_copy, model.graph, convert_qconfig_dict, node_name_to_scope)
        # check the convert_qconfig_map generated and ensure that all the values either match what was set in prepare qconfig_map
        # or are set to None in the convert_qconfig_map.
        for k, v in qconfig_map.items():
            assert k in convert_qconfig_map, 'Expected key {} in convert qconfig_map'.format(k)
            if convert_qconfig_map[k] is not None:
                assert qconfig_equals(v, convert_qconfig_map[k]), 'Expected k {} to have the same value in prepare qconfig_dict \
                and convert qconfig_dict, found {} updated to {}.'.format(k, v, convert_qconfig_map[k])
        qconfig_map = convert_qconfig_map

    custom_module_classes = get_custom_module_class_keys(
        convert_custom_config_dict,
        "observed_to_quantized_custom_module_class")
    custom_module_class_mapping = convert_custom_config_dict.get("observed_to_quantized_custom_module_class", {})

    if model._equalization_qconfig_map is not None:
        # If we want to do equalization then do the following:
        # Calculate the equalization scale, update the observers with the scaled
        # inputs, and scale the weight
        weight_eq_obs_dict = update_obs_for_equalization(model, modules)
        convert_eq_obs(model, modules, weight_eq_obs_dict)

    # always run weight observers in the top level forward method
    # for dynamic quant ops or weight only quant ops
    run_weight_observers(model)

    graph_inputs: List[str] = []
    for node in model.graph.nodes:
        if node.op == 'placeholder':
            graph_inputs.append(node.name)

    # TODO: move this outside of this function
    def replace_observer_with_quantize_dequantize_node(
            model: torch.nn.Module,
            graph: Graph,
            node: Node,
            modules: Dict[str, torch.nn.Module],
            node_name_to_scope: Dict[str, Tuple[str, type]],
            qconfig_map: Dict[str, QConfigAny]) -> None:
        """ Replace activation_post_process module call node with quantize and
        dequantize node

        Before:
        ... -> observer_0(x) -> ...
        After:
        ... -> torch.quantize_per_tensor(x, ...) -> x.dequantize() -> ...
        """
        assert modules is not None
        assert isinstance(node.target, str)
        module_path, prefix = get_module_path_and_prefix(node, node_name_to_scope, qconfig_map)
        observer_module = modules[node.target]
        maybe_quantize_node_info = get_quantize_node_info(observer_module)
        # Skip replacing observers to quant/dequant nodes if the qconfigs of all
        # consumers and producers of this observer are None
        skip_replacement = all([
            has_none_qconfig(n, qconfig_map) for n in
            list(node.args) + list(node.users.keys())])
        if skip_replacement or maybe_quantize_node_info is None:
            # didn't find correponding quantize op and info for the observer_module
            # so we just remove the observer
            with graph.inserting_before(node):
                node.replace_all_uses_with(node.args[0])
                graph.erase_node(node)
        else:
            # otherwise, we can convert the observer moduel call to quantize/dequantize node
            node_type, quantize_op, qparams = maybe_quantize_node_info
            # replace observer node with quant - dequant node
            with graph.inserting_before(node):
                input_node = node.args[0]
                inputs = [input_node]
                for key, value in qparams.items():
                    # TODO: we can add the information of whether a value needs to
                    # be registered as an attribute in qparams dict itself
                    if key in ['_scale_', '_zero_point_']:
                        # For scale and zero_point values we register them as buffers in the root module.
                        # TODO: maybe need more complex attr name here
                        qparam_node = create_getattr_from_value(model, graph, module_path + prefix + key, value)
                        inputs.append(qparam_node)
                    else:
                        # for qparams that are not scale/zero_point (like axis, dtype) we store them as literals in the graph.
                        inputs.append(value)

                quantized_node = graph.create_node(node_type, quantize_op, tuple(inputs), {})
                dequantized_node = graph.call_method("dequantize", args=(quantized_node,))
                node.replace_all_uses_with(dequantized_node)
                graph.erase_node(node)

    # this is a temporary hack for custom module, we may want to implement
    # this properly after the custom module class design is finalized
    def replace_observer_with_dequantize_node(node: Node, graph: Graph):
        call_custom_module_node = node.args[0]
        assert isinstance(call_custom_module_node, Node), \
            f"Expecting the for call custom module node to be a Node, but got {call_custom_module_node}"
        node.replace_all_uses_with(call_custom_module_node)
        graph.erase_node(node)
        insert_dequantize_node(call_custom_module_node, graph)

    # additional state to override inputs to be quantized, if specified
    # by the user
    placeholder_node_seen_cnt = 0
    input_quantized_idxs: List[int] = prepare_custom_config_dict.get(
        "input_quantized_idxs", [])
    output_quantized_idxs: List[int] = prepare_custom_config_dict.get(
        "output_quantized_idxs", [])

    if backend_config_dict is None:
        backend_config_dict = get_native_backend_config_dict()
    root_module_to_quantized_reference_module = get_root_module_to_quantized_reference_module(backend_config_dict)
    # convert tuples so that it can work with isinstance(module, tuple_of_classes)
    root_module_classes = tuple(root_module_to_quantized_reference_module.keys())
    qat_module_classes = get_qat_module_classes(backend_config_dict)
    fused_module_classes = get_fused_module_classes(backend_config_dict)
    statically_quantized_custom_module_nodes: Set[Node] = set()

    for node in list(model.graph.nodes):
        if node.op == 'placeholder':
            cur_placeholder_node_idx = placeholder_node_seen_cnt
            placeholder_node_seen_cnt += 1
            if cur_placeholder_node_idx in input_quantized_idxs:
                # Inputs are assumed to be quantized if the user specifid the
                # input_quantized_idxs override.
                # we need to dequantize the inputs since all operators took
                # floating point inputs in reference quantized models
                insert_dequantize_node(node, model.graph)
        elif node.op == "output":
            # If the argument is empty we don't need to do anything
            if len(output_quantized_idxs) == 0:
                continue
            # Result are kept quantized if the user specified the
            # output_quantized_idxs override.
            # Remove the dequantize operator for the node in the end if any
            return_node = node
            output = node.args[0]
            # outputs can be Node, list, tuple, dict, other cases are not supported yet
            if isinstance(output, (list, tuple)):
                for idx in output_quantized_idxs:
                    maybe_recursive_remove_dequantize(output[idx], return_node, model.graph)
            elif isinstance(output, (Node, dict)):
                # we treat dict as a single argument currently, but it can be extended
                # to support {"key": dtype} after we change output_quantized_idxs to
                # dict
                if 0 in output_quantized_idxs:
                    maybe_recursive_remove_dequantize(output, return_node, model.graph)
            else:
                warnings.warn(f"Unsupported node type for output_quantized_idxs: {type(output)}")
        elif node.op == "call_module":
            if is_activation_post_process(modules[node.target]):
                observed_node = node.args[0]
                if observed_node in statically_quantized_custom_module_nodes:
                    replace_observer_with_dequantize_node(node, model.graph)
                else:
                    replace_observer_with_quantize_dequantize_node(
                        model, model.graph, node, modules, node_name_to_scope,
                        qconfig_map)
            elif is_observed_standalone_module(modules[node.target]):
                convert_standalone_module(
                    node, modules, model, is_reference, backend_config_dict)
            elif type(modules[node.target]) in set(
                    root_module_classes).union(qat_module_classes).union(fused_module_classes):
                # extra check for fused module classes to make sure they are fused module classes
                # of target modules
                if type(modules[node.target]) in fused_module_classes and \
                   type(modules[node.target][0]) not in root_module_classes:
                    continue
                convert_weighted_module(
                    node, modules, observed_node_names, qconfig_map, backend_config_dict)
            elif type(modules[node.target]) in custom_module_classes:
                convert_custom_module(
                    node, model.graph, modules, custom_module_class_mapping,
                    statically_quantized_custom_module_nodes)

    preserved_attributes = set(convert_custom_config_dict.get("preserved_attributes", []))
    model = QuantizedGraphModule(model, copy.deepcopy(model.graph), preserved_attributes)

    # remove deadcode after converting observers to quant/dequant ops
    model.graph.eliminate_dead_code()
    model.recompile()

    # TODO: maybe move this to quantize_fx.py
    if not is_reference:
        model = duplicate_dequantize_node(model)
        model = duplicate_quantize_dynamic_node(model)
        model = lower_to_fbgemm(model, qconfig_map, node_name_to_scope)
        model = remove_quant_dequant_pairs(model)
        model = remove_extra_dequantize(model)
    # TODO: this looks hacky, we want to check why we need this and see if we can
    # remove this
    # removes qconfig and activation_post_process modules
    if _remove_qconfig_flag:
        _remove_qconfig(model)
    return model
예제 #3
0
def get_base_name_to_sets_of_related_ops() -> Dict[str, Set[NSNodeTargetType]]:
    # note: this set is modified below by items from backend_config_dict
    sets_of_related_ops: List[Set[NSNodeTargetType]] = [
        # conv modules
        set([
            nn.Conv1d,
        ]),
        set([
            nn.Conv2d,
        ]),
        set([
            nn.Conv3d,
        ]),
        # conv functionals
        set([
            F.conv1d,
        ]),
        set([
            F.conv2d,
        ]),
        set([
            F.conv3d,
        ]),
        # linear modules
        set([
            nn.Linear,
        ]),
        # linear functionals
        set([
            F.linear,
        ]),
        # average pool
        set([
            nn.AvgPool1d,
            torch.avg_pool1d,
        ]),
        set([
            nn.AvgPool2d,
            torch._C._nn.avg_pool2d,
        ]),
        set([
            nn.AvgPool3d,
            torch._C._nn.avg_pool3d,
        ]),
        # adaptive average pool
        set([
            nn.AdaptiveAvgPool1d,
            F.adaptive_avg_pool1d,
        ]),
        set([
            nn.AdaptiveAvgPool2d,
            F.adaptive_avg_pool2d,
        ]),
        set([
            nn.AdaptiveAvgPool3d,
            F.adaptive_avg_pool3d,
        ]),
        # LSTM
        set([
            nn.LSTM,
        ]),
        # add
        set([
            torch.add,
            operator.add,  # x + y
        ]),
        # cat
        set([
            torch.cat,
        ]),
        # mul
        set([
            torch.mul,
            operator.mul,
        ]),
        # relu
        set([
            F.relu,
            nn.ReLU,
            'relu',
            'relu_',
            torch.relu,
        ]),
        # maxpool
        set([
            nn.MaxPool1d,
            F.max_pool1d,
        ]),
        set([
            nn.MaxPool2d,
            F.max_pool2d,
        ]),
        set([
            nn.MaxPool3d,
            F.max_pool3d,
        ]),
        # sigmoid
        set([
            torch.sigmoid,
            'sigmoid',
            'sigmoid_',
            nn.Sigmoid,
            F.sigmoid,
        ]),
        # BatchNorm
        set([
            nn.BatchNorm2d,
        ]),
        set([
            nn.BatchNorm3d,
        ]),
        # ConvTranspose
        set([
            nn.ConvTranspose1d,
        ]),
        set([
            nn.ConvTranspose2d,
        ]),
        set([
            nn.ConvTranspose3d,
        ]),
        # ELU
        set([
            nn.ELU,
        ]),
        # Embedding
        set([
            nn.Embedding,
        ]),
        # EmbeddingBag
        set([
            nn.EmbeddingBag,
        ]),
        # GroupNorm
        set([
            nn.GroupNorm,
        ]),
        # Hardswish
        set([
            nn.Hardswish,
        ]),
        # InstanceNorm
        set([
            nn.InstanceNorm1d,
        ]),
        set([
            nn.InstanceNorm2d,
        ]),
        set([
            nn.InstanceNorm3d,
        ]),
        # LayerNorm
        set([
            nn.LayerNorm,
        ]),
        # LeakyReLU
        set([
            nn.LeakyReLU,
        ]),
        # ReLU6
        set([
            nn.ReLU6,
            F.relu6,
        ]),
        # F.elu
        set([
            F.elu,
        ]),
        # F.hardswish
        set([
            F.hardswish,
        ]),
        # F.instance_norm
        set([
            F.instance_norm,
        ]),
        # F.layer_norm
        set([
            F.layer_norm,
        ]),
        # F.leaky_relu
        set([
            F.leaky_relu,
        ]),
        # F.silu
        set([
            nn.SiLU,
            F.silu,
        ]),
        # F.mish
        set([
            nn.Mish,
            F.mish,
        ]),
        # F.tanh
        set([
            nn.Tanh,
            F.tanh,
            torch.tanh,
            'tanh_',
            'tanh',
        ]),
        # F.hardsigmoid
        set([
            'hardsigmoid_',
            'hardsigmoid',
            F.hardsigmoid,
            nn.Hardsigmoid,
        ]),
        # F.hardtanh
        set([
            nn.Hardtanh,
            F.hardtanh,
            F.hardtanh_,
        ]),
        # floordiv
        set([
            operator.floordiv,
        ]),
        # unsqueeze
        set([
            torch.unsqueeze,
        ]),
        # stack
        set([
            torch.stack,
        ]),
        # squeeze
        set([
            torch.squeeze,
        ]),
        # sort
        set([
            torch.sort,
        ]),
        # repeat_interleave
        set([
            torch.repeat_interleave,
        ]),
        # min
        set([
            torch.min,
        ]),
        # mean
        set([
            torch.mean,
        ]),
        # max
        set([
            torch.max,
        ]),
        # transpose
        set([
            torch.transpose,
        ]),
        # flatten
        set([
            torch.flatten,
        ]),
        # clamp
        set([
            torch.clamp,
        ]),
        # chunk
        set([
            torch.chunk,
        ]),
        # interpolate
        set([
            torch.nn.functional.interpolate,
        ]),
        # dropout
        set([
            nn.Dropout,
        ]),
        # F.dropout
        set([
            F.dropout,
        ]),
        # matmul
        set([
            torch.matmul,
        ]),
        # Softmax
        set([
            nn.Softmax,
        ]),
    ]

    # for each floating point op, add versions of the op added by
    # backend_config_dict
    backend_config_dict = get_native_backend_config_dict()

    new_connections = [
        # technical debt edge case
        (nn.Linear, nn.modules.linear.NonDynamicallyQuantizableLinear),
    ]

    for config in backend_config_dict['configs']:

        if 'pattern' not in config:
            continue

        # format: (c, (b, a))
        pattern = config['pattern']
        first_element = pattern
        # look from the end, because pattern is in reverse order
        while isinstance(first_element, (list, tuple)):
            first_element = first_element[-1]

        if 'fused_module' in config:
            # case 1: pattern fuses a pattern of ops into an op
            # example: nn.Conv1d, nn.ReLU fused into nni.ConvReLU1d
            new_connections.append((first_element, config['fused_module']))

        if 'qat_module' in config:
            # case 2: pattern swaps a module into a QAT module
            # example: nni.ConvReLU1d swapped into nniqat.ConvReLU1d
            new_connections.append((first_element, config['qat_module']))

        if 'reference_quantized_module_for_root' in config:
            # case 3: reference version of floating point module, such as
            # nn.Conv2d and nnqr.Conv2d
            new_connections.append(
                (first_element, config['reference_quantized_module_for_root']))

    #
    # Add reference module swaps from default lowering path
    #

    for source_to_target in (
            _lower_to_native_backend.STATIC_LOWER_MODULE_MAP,
            _lower_to_native_backend.DYNAMIC_LOWER_MODULE_MAP,
            _lower_to_native_backend.WEIGHT_ONLY_LOWER_MODULE_MAP,
            _lower_to_native_backend.SPECIAL_PATTERN_LOWER_MODULE_MAP,
    ):
        for source, target in source_to_target.items(
        ):  # type: ignore[attr-defined]
            new_connections.append((source, target))

    for source_to_double_target in (
            _lower_to_native_backend.STATIC_LOWER_FUSED_MODULE_MAP,
            _lower_to_native_backend.DYNAMIC_LOWER_FUSED_MODULE_MAP,
    ):
        for source, (target1, target2) in source_to_double_target.items(
        ):  # type: ignore[attr-defined]
            new_connections.append((source, target1))
            new_connections.append((source, target2))

    #
    # Add function swaps from default lowering path
    #

    for source, (target1, target2) in \
            _lower_to_native_backend.STATIC_LOWER_FUNCTIONAL_MAP.items():
        new_connections.append((source, target1))
        new_connections.append((source, target2))

    for source_to_target in (
            _lower_to_native_backend.QBIN_OP_MAPPING,
            _lower_to_native_backend.QBIN_RELU_OP_MAPPING,
            quantization_mappings.DEFAULT_FLOAT_TO_QUANTIZED_OPERATOR_MAPPINGS,
    ):
        for source, target in source_to_target.items():
            new_connections.append((source, target))

    #
    # Add other swaps, ideally in the future this could be removed
    # after the lowering code stops using these.
    #
    for source_to_target in (
            quantization_mappings.DEFAULT_DYNAMIC_QUANT_MODULE_MAPPINGS, ):
        for source, target in source_to_target.items():
            new_connections.append((source, target))

    # add the new connections from backend_config_dict
    for item1, item2 in new_connections:
        for set_of_related_ops in sets_of_related_ops:
            if item1 in set_of_related_ops or item2 in set_of_related_ops:
                set_of_related_ops.add(item1)
                set_of_related_ops.add(item2)
                break

    base_name_to_sets_of_related_ops: Dict[str, Set[NSNodeTargetType]] = {}

    counter = 0
    for set_of_related_ops in sets_of_related_ops:
        base_name = str(counter)
        counter += 1
        base_name_to_sets_of_related_ops[base_name] = set_of_related_ops

    return base_name_to_sets_of_related_ops
예제 #4
0
import os.path


# Create a directory for the images, if it doesn't exist
QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH = os.path.join(
    os.path.realpath(os.path.join(__file__, "..")),
    "quantization_backend_configs"
)

if not os.path.exists(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH):
    os.mkdir(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH)

output_path = os.path.join(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH, "default_backend_config.txt")

with open(output_path, "w") as f:
    native_backend_config_dict = get_native_backend_config_dict()

    configs = native_backend_config_dict['configs']

    def _sort_key_func(entry):
        pattern = entry['pattern']
        while isinstance(pattern, tuple):
            pattern = pattern[-1]

        pattern = remove_boolean_dispatch_from_name(pattern)
        if not isinstance(pattern, str):
            # methods are already strings
            pattern = torch.typename(pattern)

        # we want
        #
예제 #5
0
"""
This script will generate default values of quantization configs.
These are for use in the documentation.
"""

from torch.ao.quantization.backend_config import get_native_backend_config_dict
import os.path
from pprint import pprint


# Create a directory for the images, if it doesn't exist
QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH = os.path.join(
    os.path.realpath(os.path.join(__file__, "..")),
    "quantization_backend_configs"
)

if not os.path.exists(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH):
    os.mkdir(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH)

output_path = os.path.join(QUANTIZATION_BACKEND_CONFIG_IMAGE_PATH, "default_backend_config.txt")

with open(output_path, "w") as f:
    pprint(get_native_backend_config_dict(), stream=f)