Exemple #1
0
    def test_input_weight_equalization_convert(self):
        """ Tests that the modified model for equalization (before quantization)
        returns the same output as the original model
        """

        tests = [(SingleLayerLinearModel, 2), (LinearAddModel, 2),
                 (TwoLayerLinearModel, 2),
                 (SingleLayerFunctionalLinearModel, 2),
                 (FunctionalLinearAddModel, 2),
                 (TwoLayerFunctionalLinearModel, 2), (LinearReluModel, 2),
                 (LinearReluLinearModel, 2), (LinearReluAddModel, 2),
                 (FunctionalLinearReluModel, 2),
                 (FunctionalLinearReluLinearModel, 2), (ConvModel, 4),
                 (TwoLayerConvModel, 4), (SingleLayerFunctionalConvModel, 4),
                 (TwoLayerFunctionalConvModel, 4), (ConvReluModel, 4),
                 (ConvReluConvModel, 4), (ConvReluAddModel, 4),
                 (FunctionalConvReluModel, 4),
                 (FunctionalConvReluConvModel, 4)]

        for (M, ndim) in tests:
            m = M().eval()

            if ndim == 2:
                x = torch.rand((5, 5))
            elif ndim == 4:
                x = torch.rand((16, 3, 224, 224))

            prepared = prepare_fx(
                copy.deepcopy(m),
                specific_qconfig_dict,
                equalization_qconfig_dict=default_equalization_qconfig_dict)
            output = prepared(x)

            convert_ref = _convert_equalization_ref(prepared)
            convert_ref_output = convert_ref(x)

            prepared = prepare_fx(
                m,
                specific_qconfig_dict,
                equalization_qconfig_dict=default_equalization_qconfig_dict)
            prepared(x)
            convert_fx(prepared)  # Check if compile
            self.assertEqual(output, convert_ref_output)
Exemple #2
0
    def test_simple_mod(self):
        m = nn.Sequential(nn.Conv2d(1, 1, 1)).eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = get_matching_subgraph_pairs(mp, mq)

        expected_types = {'0': ((nn.Conv2d, nn.Conv2d), (nnq.Conv2d, nnq.Conv2d))}
        self.assert_types_for_matched_subgraph_pairs(results, expected_types, mp, mq)
Exemple #3
0
 def saveDQ(self):
     qconfig_dict = {
         "object_type": [
             (nn.Embedding, float_qparams_weight_only_qconfig),
             (nn.LSTM, default_dynamic_qconfig),
             (nn.Linear, default_dynamic_qconfig)
         ]
     }
     prepared_model = prepare_fx(self.input_net, qconfig_dict)
     quantized_model = convert_fx(prepared_model)
     torch.jit.save(self._freeze_jit(quantized_model), self.dq_output_file)
Exemple #4
0
 def test_mobilenet_v2(self):
     # verify that mobilenetv2 graph is able to be matched
     import torchvision
     m = torchvision.models.__dict__['mobilenet_v2'](pretrained=False).eval().float()
     mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
     # TODO(future PR): prevent the need for copying here, we can copy the
     # modules but should reuse the underlying tensors
     mp_copy = copy.deepcopy(mp)
     mq = convert_fx(mp_copy)
     # assume success if no exceptions
     results = get_matching_subgraph_pairs(mp, mq)
Exemple #5
0
    def test_input_weight_equalization_prepare(self):
        """ Tests that graphs created after prepare_fx is as expected
        """

        linear_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 1,
            ns.call_module(MinMaxObserver): 2,
        }

        linear2_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 3,
        }

        functionalLinear_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 1,
            ns.call_module(_WeightEqualizationObserver): 1,
            ns.call_module(MinMaxObserver): 3,
        }

        functionalLinear2_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(_WeightEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 5,
        }

        functionalLinear2Add_node_occurrence = {
            ns.call_module(_InputEqualizationObserver): 2,
            ns.call_module(_WeightEqualizationObserver): 2,
            ns.call_module(MinMaxObserver): 6,
        }

        tests = [
            (SingleLayerLinearModel, linear_node_occurrence),
            (TwoLayerLinearModel, linear2_node_occurrence),
            (SingleLayerFunctionalLinearModel,
             functionalLinear_node_occurrence),
            (TwoLayerFunctionalLinearModel, functionalLinear2_node_occurrence),
            (FunctionalLinearAddModel, functionalLinear2Add_node_occurrence),
            (LinearReluModel, linear_node_occurrence),
            (LinearReluLinearModel, linear2_node_occurrence),
            (FunctionalLinearReluModel, functionalLinear_node_occurrence),
            (FunctionalLinearReluLinearModel,
             functionalLinear2_node_occurrence)
        ]

        for (M, node_occurrence) in tests:
            m = M().eval()
            prepared = prepare_fx(
                m,
                qconfig_dict,
                equalization_qconfig_dict=default_equalization_qconfig_dict)
            self.checkGraphModuleNodes(
                prepared, expected_node_occurrence=node_occurrence)
Exemple #6
0
 def test_simple_mod_multi(self):
     m = nn.Sequential(
         nn.Sequential(nn.Conv2d(1, 1, 1), ),
         nn.Conv2d(1, 1, 1),
     ).eval()
     mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
     # TODO(future PR): prevent the need for copying here, we can copy the
     # modules but should reuse the underlying tensors
     mp_copy = copy.deepcopy(mp)
     mq = convert_fx(mp_copy)
     # assume success if no exceptions
     results = get_matching_subgraph_pairs(mp, mq)
Exemple #7
0
    def test_match_activations_fun(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w1 = nn.Parameter(torch.Tensor(4, 4))
                self.b1 = nn.Parameter(torch.zeros(4))
                self.w2 = nn.Parameter(torch.Tensor(4, 4))
                self.b2 = nn.Parameter(torch.zeros(4))
                torch.nn.init.kaiming_uniform_(self.w1, a=math.sqrt(5))
                torch.nn.init.kaiming_uniform_(self.w2, a=math.sqrt(5))

            def forward(self, x):
                x = F.linear(x, self.w1, self.b1)
                x = F.linear(x, self.w2, self.b2)
                x = F.relu(x)
                return x

        m = M().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(torch.randn(4, 4))
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_ns, mq_ns = prepare_model_outputs('fp32_prepared', mp, 'int8', mq,
                                             OutputLogger)

        expected_occurrence = {
            ns.call_module(OutputLogger): 2,
        }
        self.checkGraphModuleNodes(
            mp_ns, expected_node_occurrence=expected_occurrence)
        self.checkGraphModuleNodes(
            mq_ns, expected_node_occurrence=expected_occurrence)

        # TODO(before land): test both scripted and non-scripted
        mp_ns = torch.jit.script(mp_ns)
        mq_ns = torch.jit.script(mq_ns)

        # calibrate
        input_fp32 = torch.randn(4, 4)
        mp_ns(input_fp32)
        mq_ns(input_fp32)

        # check activation result correctness
        act_compare_dict = get_matching_activations('fp32_prepared', mp_ns,
                                                    'int8', mq_ns,
                                                    OutputLogger)
        self.assertTrue(len(act_compare_dict) == 2)
        self.assert_ns_logger_act_compare_dict_valid(act_compare_dict)
Exemple #8
0
    def test_simple_fusion(self):
        m = LinearReluFunctional().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = get_matching_subgraph_pairs(mp, mq)

        expected_types = {
            'base_op_torch.nn.functional.linear_0':
                ((F.linear, F.relu), (toq.linear_relu, toq.linear_relu)),
        }
        self.assert_types_for_matched_subgraph_pairs(results, expected_types, mp, mq)
Exemple #9
0
def quantize_fx(model, inputs, data_loader, dynamic=True):

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):

        static = not dynamic

        if dynamic:
            qconfig = per_channel_dynamic_qconfig
        else:
            qconfig = QConfig(
                activation=HistogramObserver.with_args(reduce_range=False),
                weight=default_weight_observer,
            )

        # Only linear layers
        qconfig_dict = {"": None}
        qconfig_dict["object_type"] = [(torch.nn.Linear, qconfig)]

        def calibrate(model, loader, max_samples=-1):
            model.eval()
            with torch.no_grad():
                for (idx, d) in enumerate(loader):
                    print("Running sample input #" + str(idx))
                    model(d[1]["tokens"])
                    if idx == max_samples:
                        break

        prepared_model = prepare_fx(
            model.encoder.encoder.transformer.layers.layers,
            qconfig_dict)  # fuse modules and insert observers

        model.encoder.encoder.transformer.layers.layers = prepared_model
        if static:
            calibrate(model, data_loader)  # run calibration on sample data
        model.encoder.encoder.transformer.layers.layers = convert_fx(
            prepared_model)

        # Trace the submodule in order to fix the interface
        if static:
            input1 = torch.randn([2, 1, 1024], dtype=torch.float)
            input2 = torch.randn([1, 2]).bool()
            traced = torch.jit.trace(
                model.encoder.encoder.transformer.layers.layers,
                (input1, input2))
            model.encoder.encoder.transformer.layers.layers = traced

        # Trace the overall module
        trace = model.trace(inputs)

        return trace
Exemple #10
0
 def _test_extract_weights(self, m, results_len=0, qconfig_dict=None):
     if qconfig_dict is None:
         qconfig_dict = {'': torch.quantization.default_qconfig}
     mp = prepare_fx(m, qconfig_dict)
     # TODO(future PR): prevent the need for copying here, we can copy the
     # modules but should reuse the underlying tensors
     mp_copy = copy.deepcopy(mp)
     mq = convert_fx(mp_copy)
     results = extract_weights('fp32_prepared', mp, 'int8', mq)
     self.assertTrue(
         len(results) == results_len,
         f"expected len {results_len}, got len {len(results)}")
     self.assert_ns_compare_dict_valid(results)
     return results
Exemple #11
0
def prepare_model_outputs_fx(prepared_float_module,
                             q_module,
                             Logger=OutputLogger,
                             allow_list=None):
    r"""Prepare the model by attaching the logger to both float module (after prepare)
    and quantized module if they are in the allow_list.

    Args:
        prepared_float_module: float module after prepare
        q_module: module quantized from float_module
        Logger: type of logger to be attached to float_module and q_module
        allow_list: list of module types to attach logger
    """
    torch._C._log_api_usage_once(
        "quantization_api._numeric_suite.prepare_model_outputs_fx")
    if allow_list is None:
        allow_list = get_default_compare_output_module_list()

    prepared_float_module = remove_qconfig_observer_fx(prepared_float_module)

    qconfig_debug = torch.quantization.QConfig(activation=Logger, weight=None)
    qconfig_dict = {"": qconfig_debug}

    additional_quant_patterns = {}

    for module in allow_list:
        additional_quant_patterns[module] = NumericSuiteQuantizeHandler

    prepare_custom_config_dict = {
        "additional_quant_pattern": additional_quant_patterns
    }

    prepared_float_module = prepare_fx(prepared_float_module, qconfig_dict,
                                       prepare_custom_config_dict)
    q_module = prepare_fx(q_module, qconfig_dict, prepare_custom_config_dict)

    return prepared_float_module, q_module
Exemple #12
0
    def _test_match_activations(
        self,
        m,
        data,
        prepared_expected_node_occurrence=None,
        results_len=0,
        should_log_inputs=False,
        qconfig_dict=None,
        skip_scripting=False,
    ):
        if qconfig_dict is None:
            qconfig_dict = {'': torch.quantization.default_qconfig}
        mp = prepare_fx(m, qconfig_dict)
        mp(*data)
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_ns, mq_ns = add_loggers('fp32_prepared',
                                   mp,
                                   'int8',
                                   mq,
                                   OutputLogger,
                                   should_log_inputs=should_log_inputs)

        if prepared_expected_node_occurrence:
            self.checkGraphModuleNodes(
                mp_ns,
                expected_node_occurrence=prepared_expected_node_occurrence)
            self.checkGraphModuleNodes(
                mq_ns,
                expected_node_occurrence=prepared_expected_node_occurrence)

        if not skip_scripting:
            mp_ns = torch.jit.script(mp_ns)
            mq_ns = torch.jit.script(mq_ns)

        # calibrate
        mp_ns(*data)
        mq_ns(*data)

        # check activation result correctness
        act_compare_dict = extract_logger_info(mp_ns, mq_ns, OutputLogger)
        self.assertTrue(
            len(act_compare_dict) == results_len,
            f"expected len {results_len}, got len {len(act_compare_dict)}")
        self.assert_ns_compare_dict_valid(act_compare_dict)
        return act_compare_dict
Exemple #13
0
    def test_simple_tensor_ops(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()

            def forward(self, x, y):
                z = x + y
                return z

        m = M().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        # assume success if no exceptions
        results = get_matching_subgraph_pairs(mp, mq)
    def test_nodes_with_equal_types_do_not_get_matched(self):
        # verifies that by default, nodes with equivalent types do not get matched.
        # This is important for user defined types, for which we do not know
        # the weight extraction functions or input type. In the future, this can
        # be made configurable.
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.conv1 = nn.Conv2d(1, 1, 1)
                self.conv2 = nn.Conv2d(1, 1, 1)

            def forward(self, x):
                x = self.conv1(x)
                x = self.conv2(x)
                x = torch.mul(x, x)
                x = torch.sigmoid(x)
                x = F.relu(x)
                return x

        m = M().eval()
        # prevent conv2 from getting quantized, so we can test
        # modules with equal types
        qconfig_dict = {
            '': torch.quantization.default_qconfig,
            'module_name': [('conv2', None)],
        }
        mp = prepare_fx(m, qconfig_dict)
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = get_matching_subgraph_pairs(mp, mq)

        # Conv2 should not be matched because we disabled quantization for it,
        # so its type is the same in mp and mq. sigmoid should not be
        # matched because they use the same function in mp and mq. relu should
        # be matched because it is in the allowlist of functions with same
        # signature across dtypes.
        expected_types = {
            'base_op_torch.nn.Conv2d_0':
            ((nn.Conv2d, nn.Conv2d), (nnq.Conv2d, nnq.Conv2d)),
            'base_op_torch.mul_0':
            ((torch.mul, torch.mul), (toq.mul, toq.mul)),
            'base_op_torch.relu_0': ((F.relu, F.relu), (F.relu, F.relu)),
        }
        self.assert_types_for_matched_subgraph_pairs(results, expected_types,
                                                     mp, mq)
Exemple #15
0
    def test_compare_weights_lstm_dynamic_fx(self):
        r"""Compare the weights of float and dynamic quantized lstm layer"""

        qconfig_dict = {"object_type": [(nn.LSTM, default_dynamic_qconfig)]}

        float_model = LSTMwithHiddenDynamicModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        q_model = convert_fx(prepared_model)

        expected_weight_dict_keys = {"lstm._all_weight_values.0.param"}
        self.compare_and_validate_model_weights_results_fx(
            prepared_float_model, q_model, expected_weight_dict_keys)
Exemple #16
0
    def test_submodule(self):
        # test quantizing complete module, submodule and linear layer
        configs = [
            {},
            {"module_name": [("subm", None)]},
            {"module_name": [("fc", None)]},
        ]
        for config in configs:
            model = LinearModelWithSubmodule().eval()
            qconfig_dict = {
                "": torch.quantization.get_default_qconfig("qnnpack"),
                **config,
            }
            model = prepare_fx(model, qconfig_dict)
            quant = convert_fx(model)

            x = torch.randn(5, 5)
            self._compare_script_and_mobile(quant, input=x)
Exemple #17
0
    def test_compare_weights_linear_dynamic_fx(self):
        r"""Compare the weights of float and dynamic quantized linear layer"""

        qconfig_dict = {"object_type": [(nn.Linear, default_dynamic_qconfig)]}

        float_model = SingleLayerLinearDynamicModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        q_model = convert_fx(prepared_model)

        expected_weight_dict_keys = {"fc1._packed_params._packed_params"}
        self.compare_and_validate_model_weights_results_fx(
            prepared_float_model, q_model, expected_weight_dict_keys)
Exemple #18
0
    def test_conv2d(self):
        class M(torch.nn.Module):
            def __init__(self):
                super(M, self).__init__()
                self.conv1 = nn.Conv2d(1, 1, 1)
                self.conv2 = nn.Conv2d(1, 1, 1)

            def forward(self, x):
                x = self.conv1(x)
                x = self.conv2(x)
                return x

        m = M().eval()
        qconfig_dict = {"": default_qconfig, "module_name": [("conv1", None)]}
        m = prepare_fx(m, qconfig_dict)
        data = torch.randn(1, 1, 1, 1)
        m = convert_fx(m)
        # first conv is quantized, second conv is not quantized
        self._compare_script_and_mobile(m, input=data)
    def test_compare_weights_linear_dynamic_fx(self):
        r"""Compare the weights of float and dynamic quantized linear layer"""

        qconfig = torch.quantization.qconfig.default_dynamic_qconfig
        qconfig_dict = {"": qconfig}

        float_model = SingleLayerLinearDynamicModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        backup_prepared_model = copy.deepcopy(prepared_model)
        backup_prepared_model.eval()

        q_model = convert_fx(prepared_model)

        expected_weight_dict_keys = {"fc1._packed_params._packed_params"}
        self.compare_and_validate_model_weights_results_fx(
            backup_prepared_model, q_model, expected_weight_dict_keys
        )
Exemple #20
0
    def test_nodes_with_equal_types_get_matched(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.conv1 = nn.Conv2d(1, 1, 1)
                self.conv2 = nn.Conv2d(1, 1, 1)

            def forward(self, x):
                x = self.conv1(x)
                x = self.conv2(x)
                x = torch.mul(x, x)
                x = torch.sigmoid(x)
                x = F.relu(x)
                return x

        m = M().eval()
        # prevent conv2 from getting quantized, so we can test
        # modules with equal types
        qconfig_dict = {
            '': torch.quantization.default_qconfig,
            'module_name': [('conv2', None)],
        }
        mp = prepare_fx(m, qconfig_dict)
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = get_matching_subgraph_pairs(mp, mq)

        # all of these should be matched
        expected_types = {
            'base_op_torch.nn.Conv2d_1':
            ((nn.Conv2d, nn.Conv2d), (nnq.Conv2d, nnq.Conv2d)),
            'base_op_torch.nn.Conv2d_0':
            ((nn.Conv2d, nn.Conv2d), (nn.Conv2d, nn.Conv2d)),
            'base_op_torch.mul_0':
            ((torch.mul, torch.mul), (toq.mul, toq.mul)),
            'base_op_torch.relu_0': ((F.relu, F.relu), (F.relu, F.relu)),
            'base_op_torch.sigmoid_0':
            ((torch.sigmoid, torch.sigmoid), (torch.sigmoid, torch.sigmoid)),
        }
        self.assert_types_for_matched_subgraph_pairs(results, expected_types,
                                                     mp, mq)
Exemple #21
0
    def test_compare_weights_conv_static_fx(self):
        r"""Compare the weights of float and static quantized conv layer"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        model_list = [ConvModel(), ConvBnModel(), ConvBnReLUModel()]
        for float_model in model_list:
            float_model.eval()

            fused = fuse_fx(float_model)
            prepared_model = prepare_fx(float_model, qconfig_dict)

            # Run calibration
            test_only_eval_fn(prepared_model, self.img_data_2d)
            q_model = convert_fx(prepared_model)

            expected_weight_dict_keys = {"conv.weight"}
            self.compare_and_validate_model_weights_results_fx(
                fused, q_model, expected_weight_dict_keys)
Exemple #22
0
    def test_compare_model_outputs_linear_dynamic_fx(self):
        r"""Compare the output of linear layer in dynamic quantized model and corresponding
        output of linear layer in float model
        """

        qconfig_dict = {"object_type": [(nn.Linear, default_dynamic_qconfig)]}

        float_model = SingleLayerLinearDynamicModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)
        prepared_float_model = copy.deepcopy(prepared_model)

        q_model = convert_fx(prepared_model)

        linear_data = self.calib_data[0][0]

        expected_act_compare_dict_keys = {"x.stats", "fc1.stats"}
        self.compare_and_validate_model_outputs_results_fx(
            prepared_float_model, q_model, expected_act_compare_dict_keys,
            linear_data)
Exemple #23
0
    def test_simple_fun(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w = nn.Parameter(torch.Tensor(1, 4))
                self.b = nn.Parameter(torch.zeros(1))
                torch.nn.init.kaiming_uniform_(self.w, a=math.sqrt(5))

            def forward(self, x):
                return F.linear(x, self.w, self.b)

        m = M().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = get_matching_subgraph_pairs(mp, mq)

        expected_types = {'linear': ((F.linear, F.linear), (toq.linear, toq.linear))}
        self.assert_types_for_matched_subgraph_pairs(results, expected_types, mp, mq)
Exemple #24
0
    def test_compare_weights_fun(self):
        class M(nn.Module):
            def __init__(self):
                super().__init__()
                self.w = nn.Parameter(torch.Tensor(4, 1))
                self.b = nn.Parameter(torch.zeros(4))
                torch.nn.init.kaiming_uniform_(self.w, a=math.sqrt(5))

            def forward(self, x):
                return F.linear(x, self.w, self.b)

        m = M().eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(torch.randn(1, 1))
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)
        results = compare_weights('fp32_prepared', mp, 'int8', mq)
        self.assertTrue(len(results) == 1)
        self.assert_ns_weight_compare_dict_valid(results)
Exemple #25
0
    def test_remove_qconfig_observer_fx(self):
        r"""Remove activation_post_process node from fx prepred model"""
        float_model = SingleLayerLinearModel()
        float_model.eval()

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)

        qconfig_dict = {"": qconfig}

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        model = remove_qconfig_observer_fx(prepared_float_model)

        modules = dict(model.named_modules())
        for node in model.graph.nodes:
            if node.op == "call_module":
                self.assertFalse(is_activation_post_process(modules[node.target]))
Exemple #26
0
    def test_input_weight_equalization_activation_values(self):
        """ After applying the equalization functions check if the input
        observer's min/max values are as expected
        """

        tests = [SingleLayerLinearModel, TwoLayerLinearModel, SingleLayerFunctionalLinearModel]

        x = torch.rand((5, 5))
        torch.manual_seed(0)
        for M in tests:
            m = M().eval()
            exp_eq_scales = self.get_expected_eq_scales(m, x.detach().numpy())
            exp_weights, exp_bias = self.get_expected_weights_bias(m, x.detach().numpy(), exp_eq_scales)
            exp_inp_act_vals = self.get_expected_inp_act_vals(m, x, exp_eq_scales, exp_weights, exp_bias)
            exp_weight_act_vals = self.get_expected_weight_act_vals(exp_weights)

            prepared = prepare_fx(m, specific_qconfig_dict, equalization_qconfig_dict=default_equalization_qconfig_dict)
            prepared(x)
            convert_ref = _convert_equalization_ref(prepared)
            convert_ref(x)

            modules = dict(convert_ref.named_modules(remove_duplicate=False))
            inp_counter = 0
            weight_counter = 0
            for node in convert_ref.graph.nodes:
                if "weight" not in node.name and node.op == 'call_module' and \
                   isinstance(modules[str(node.target)], MinMaxObserver):
                    # Check min/max values of input activation layers
                    exp_min_val, exp_max_val = exp_inp_act_vals[inp_counter]
                    self.assertEqual(modules[str(node.target)].min_val, exp_min_val)
                    self.assertEqual(modules[str(node.target)].max_val, exp_max_val)
                    inp_counter += 1

                elif node.op == 'call_module' and isinstance(modules[str(node.target)], MinMaxObserver):
                    # Check min/max values of weight activation layers
                    assert("weight" in node.name)
                    exp_min_val, exp_max_val = exp_weight_act_vals[weight_counter]
                    self.assertEqual(modules[str(node.target)].min_val, exp_min_val)
                    self.assertEqual(modules[str(node.target)].max_val, exp_max_val)
                    weight_counter += 1
Exemple #27
0
    def _test_match_shadow_activations(
        self,
        m,
        data,
        prepared_expected_node_occurrence=None,
        results_len=0,
        should_log_inputs=False,
    ):
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(*data)
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_shadows_mq = add_shadow_loggers('fp32_prepared',
                                           mp,
                                           'int8',
                                           mq,
                                           OutputLogger,
                                           should_log_inputs=should_log_inputs)

        if prepared_expected_node_occurrence:
            self.checkGraphModuleNodes(
                mp_shadows_mq,
                expected_node_occurrence=prepared_expected_node_occurrence)

        # TODO(before land): test both scripted and non-scripted
        mp_shadows_mq = torch.jit.script(mp_shadows_mq)

        # calibrate
        mp_shadows_mq(*data)

        # check activation result correctness
        act_compare_dict = extract_shadow_logger_info(mp_shadows_mq,
                                                      OutputLogger)
        self.assertTrue(
            len(act_compare_dict) == results_len,
            f"expected len {results_len}, got len {len(act_compare_dict)}")
        self.assert_ns_compare_dict_valid(act_compare_dict)
Exemple #28
0
    def test_compare_weights_linear_static_fx(self):
        r"""Compare the weights of float and static quantized linear layer"""

        qengine = torch.backends.quantized.engine
        qconfig = get_default_qconfig(qengine)
        qconfig_dict = {"": qconfig}

        float_model = SingleLayerLinearModel()
        float_model.eval()

        prepared_model = prepare_fx(float_model, qconfig_dict)

        prepared_float_model = copy.deepcopy(prepared_model)
        prepared_float_model.eval()

        # Run calibration
        test_only_eval_fn(prepared_model, self.calib_data)
        q_model = convert_fx(prepared_model)

        expected_weight_dict_keys = {"fc1._packed_params._packed_params"}
        self.compare_and_validate_model_weights_results_fx(
            prepared_float_model, q_model, expected_weight_dict_keys)
Exemple #29
0
def build_int8_trt_implicit_quant(rn18):
    rn18 = copy.deepcopy(rn18)
    data = torch.randn(1, 3, 224, 224)
    # Quantization
    qconfig = torch.quantization.QConfig(
        activation=torch.quantization.observer.HistogramObserver.with_args(
            qscheme=torch.per_tensor_symmetric, reduce_range=True),
        weight=torch.quantization.default_per_channel_weight_observer)
    prepared = prepare_fx(rn18, {"": qconfig})
    for _ in range(10):
        prepared(data)
    quantized_rn18 = convert_fx(prepared, is_reference=True)

    # Build trt int8 model
    traced_rn18 = torch.fx.symbolic_trace(quantized_rn18)
    shape_prop.ShapeProp(traced_rn18).propagate(data)
    traced_rn18 = NormalizeArgs(traced_rn18).transform()
    interp = TRTInterpreter(traced_rn18, InputTensorSpec.from_tensors([data]))
    engine, input_names, output_names = interp.run(
        fp16_mode=False, int8_mode=True, strict_type_constraints=True)
    trt_mod = TRTModule(engine, input_names, output_names)
    return trt_mod
Exemple #30
0
    def test_match_activations_mod(self):
        m = nn.Sequential(
            torch.quantization.QuantStub(),
            nn.Conv2d(1, 1, 1),
            nn.Conv2d(1, 1, 1),
        ).eval()
        mp = prepare_fx(m, {'': torch.quantization.default_qconfig})
        mp(torch.randn(2, 1, 2, 2))
        # TODO(future PR): prevent the need for copying here, we can copy the
        # modules but should reuse the underlying tensors
        mp_copy = copy.deepcopy(mp)
        mq = convert_fx(mp_copy)

        mp_ns, mq_ns = prepare_model_outputs('fp32_prepared', mp, 'int8', mq,
                                             OutputLogger)

        expected_occurrence = {
            ns.call_module(OutputLogger): 2,
        }
        self.checkGraphModuleNodes(
            mp_ns, expected_node_occurrence=expected_occurrence)
        self.checkGraphModuleNodes(
            mq_ns, expected_node_occurrence=expected_occurrence)

        # TODO(before land): test both scripted and non-scripted
        mp_ns = torch.jit.script(mp_ns)
        mq_ns = torch.jit.script(mq_ns)

        # calibrate
        input_fp32 = torch.randn(2, 1, 2, 2)
        mp_ns(input_fp32)
        mq_ns(input_fp32)

        # check activation result correctness
        act_compare_dict = get_matching_activations('fp32_prepared', mp_ns,
                                                    'int8', mq_ns,
                                                    OutputLogger)
        self.assertTrue(len(act_compare_dict) == 2)
        self.assert_ns_logger_act_compare_dict_valid(act_compare_dict)