Ejemplo n.º 1
0
    def test_simple_conv(self):
        torch.backends.quantized.engine = "onednn"

        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

        input = torch.randn(1, 3, 10, 10)
        prepared_model = self._prepare_model_and_run_input(ConvModel(), q_config_mapping, input)

        # run the detector
        optims_str, per_channel_info = _detect_per_channel(prepared_model)

        # no optims possible and there should be nothing in per_channel_status
        self.assertEqual(
            optims_str,
            DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
        )
        self.assertEqual(per_channel_info["backend"], torch.backends.quantized.engine)
        self.assertEqual(len(per_channel_info["per_channel_status"]), 1)
        self.assertEqual(list(per_channel_info["per_channel_status"])[0], "conv")
        self.assertEqual(
            per_channel_info["per_channel_status"]["conv"]["per_channel_supported"],
            True,
        )
        self.assertEqual(per_channel_info["per_channel_status"]["conv"]["per_channel_used"], True)
Ejemplo n.º 2
0
    def test_fusion_layer_in_sequential(self):
        torch.backends.quantized.engine = "fbgemm"

        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

        prepared_model = self._prepare_model_and_run_input(
            FUSION_CONV_LINEAR_EXAMPLE,
            q_config_mapping,
            torch.randn(1, 3, 10, 10),
        )

        # run the detector
        optims_str, per_channel_info = _detect_per_channel(prepared_model)

        # no optims possible and there should be nothing in per_channel_status
        self.assertEqual(
            optims_str,
            DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
        )

        # to ensure it got into the nested layer and it considered all the nested fusion components
        self.assertEqual(len(per_channel_info["per_channel_status"]), 4)

        # for each layer, should be supported but not used
        for key in per_channel_info["per_channel_status"].keys():
            module_entry = per_channel_info["per_channel_status"][key]
            self.assertEqual(module_entry["per_channel_supported"], True)
            self.assertEqual(module_entry["per_channel_used"], True)
Ejemplo n.º 3
0
    def test_multi_linear_model_without_per_channel(self):
        torch.backends.quantized.engine = "qnnpack"

        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

        prepared_model = self._prepare_model_and_run_input(
            TwoLayerLinearModel(),
            q_config_mapping,
            TwoLayerLinearModel().get_example_inputs()[0],
        )

        # run the detector
        optims_str, per_channel_info = _detect_per_channel(prepared_model)

        # there should be optims possible
        self.assertNotEqual(
            optims_str,
            DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
        )
        self.assertEqual(per_channel_info["backend"], torch.backends.quantized.engine)
        self.assertEqual(len(per_channel_info["per_channel_status"]), 2)

        # for each linear layer, should be supported but not used
        for linear_key in per_channel_info["per_channel_status"].keys():
            module_entry = per_channel_info["per_channel_status"][linear_key]

            self.assertEqual(module_entry["per_channel_supported"], True)
            self.assertEqual(module_entry["per_channel_used"], False)
Ejemplo n.º 4
0
    def test_conv_sub_class_considered(self):
        torch.backends.quantized.engine = "qnnpack"

        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

        prepared_model = self._prepare_model_and_run_input(
            LAZY_CONV_LINEAR_EXAMPLE,
            q_config_mapping,
            torch.randn(1, 3, 10, 10),
        )

        # run the detector
        optims_str, per_channel_info = _detect_per_channel(prepared_model)

        # there should be optims possible
        self.assertNotEqual(
            optims_str,
            DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
        )

        # to ensure it got into the nested layer and it considered the lazyConv2d
        self.assertEqual(len(per_channel_info["per_channel_status"]), 4)

        # for each layer, should be supported but not used
        for key in per_channel_info["per_channel_status"].keys():
            module_entry = per_channel_info["per_channel_status"][key]

            self.assertEqual(module_entry["per_channel_supported"], True)
            self.assertEqual(module_entry["per_channel_used"], False)
Ejemplo n.º 5
0
    def test_sequential_model_format(self):

        with override_quantized_engine('qnnpack'):
            torch.backends.quantized.engine = "qnnpack"

            q_config_mapping = QConfigMapping()
            q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

            prepared_model = self._prepare_model_and_run_input(
                NESTED_CONV_LINEAR_EXAMPLE,
                q_config_mapping,
                torch.randn(1, 3, 10, 10),
            )

            # run the detector
            per_channel_detector = PerChannelDetector(torch.backends.quantized.engine)
            optims_str, per_channel_info = per_channel_detector.generate_detector_report(prepared_model)

            # there should be optims possible
            self.assertNotEqual(
                optims_str,
                DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
            )

            # to ensure it got into the nested layer
            self.assertEqual(len(per_channel_info["per_channel_status"]), 4)

            # for each layer, should be supported but not used
            for key in per_channel_info["per_channel_status"].keys():
                module_entry = per_channel_info["per_channel_status"][key]

                self.assertEqual(module_entry["per_channel_supported"], True)
                self.assertEqual(module_entry["per_channel_used"], False)
Ejemplo n.º 6
0
 def _get_qconfig_mapping(obj: Any, dict_key: str) -> Optional[QConfigMapping]:
     """
     Convert the given object into a QConfigMapping if possible, else throw an exception.
     """
     if isinstance(obj, QConfigMapping) or obj is None:
         return obj
     if isinstance(obj, Dict):
         return QConfigMapping.from_dict(obj)
     raise ValueError("Expected QConfigMapping in prepare_custom_config_dict[\"%s\"], got '%s'" %
                      (dict_key, type(obj)))
Ejemplo n.º 7
0
    def test_nested_detection_case(self):
        class SingleLinear(torch.nn.Module):
            def __init__(self):
                super(SingleLinear, self).__init__()
                self.linear = torch.nn.Linear(3, 3)

            def forward(self, x):
                x = self.linear(x)
                return x

        class TwoBlockNet(torch.nn.Module):
            def __init__(self):
                super(TwoBlockNet, self).__init__()
                self.block1 = SingleLinear()
                self.block2 = SingleLinear()

            def forward(self, x):
                x = self.block1(x)
                y = self.block2(x)
                z = x + y
                z = F.relu(z)
                return z

        # create model, example input, and qconfig mapping
        torch.backends.quantized.engine = "fbgemm"
        model = TwoBlockNet()
        example_input = torch.randint(-10, 0, (1, 3, 3, 3))
        example_input = example_input.to(torch.float)
        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig("fbgemm"))

        # prep model and select observer
        model_prep = quantize_fx.prepare_fx(model, q_config_mapping, example_input)
        obs_ctr = ModelReportObserver

        # find layer to attach to and store
        linear_fqn = "block2.linear"  # fqn of target linear

        target_linear = None
        for node in model_prep.graph.nodes:
            if node.target == linear_fqn:
                target_linear = node
                break

        # insert into both module and graph pre and post

        # set up to insert before target_linear (pre_observer)
        with model_prep.graph.inserting_before(target_linear):
            obs_to_insert = obs_ctr()
            pre_obs_fqn = linear_fqn + ".model_report_pre_observer"
            model_prep.add_submodule(pre_obs_fqn, obs_to_insert)
            model_prep.graph.create_node(op="call_module", target=pre_obs_fqn, args=target_linear.args)

        # set up and insert after the target_linear (post_observer)
        with model_prep.graph.inserting_after(target_linear):
            obs_to_insert = obs_ctr()
            post_obs_fqn = linear_fqn + ".model_report_post_observer"
            model_prep.add_submodule(post_obs_fqn, obs_to_insert)
            model_prep.graph.create_node(op="call_module", target=post_obs_fqn, args=(target_linear,))

        # need to recompile module after submodule added and pass input through
        model_prep.recompile()

        num_iterations = 10
        for i in range(num_iterations):
            if i % 2 == 0:
                example_input = torch.randint(-10, 0, (1, 3, 3, 3)).to(torch.float)
            else:
                example_input = torch.randint(0, 10, (1, 3, 3, 3)).to(torch.float)
            model_prep(example_input)

        # run it through the dynamic vs static detector
        dynam_vs_stat_str, dynam_vs_stat_dict = _detect_dynamic_vs_static(model_prep, tolerance=0.5)

        # one of the stats should be stationary, and the other non-stationary
        # as a result, dynamic should be recommended
        data_dist_info = [
            dynam_vs_stat_dict[linear_fqn]["pre_observer_data_dist"],
            dynam_vs_stat_dict[linear_fqn]["post_observer_data_dist"],
        ]

        self.assertTrue("stationary" in data_dist_info)
        self.assertTrue("non-stationary" in data_dist_info)
        self.assertTrue(dynam_vs_stat_dict[linear_fqn]["dynamic_recommended"])
Ejemplo n.º 8
0
    def test_multiple_q_config_options(self):
        torch.backends.quantized.engine = "qnnpack"

        # qconfig with support for per_channel quantization
        per_channel_qconfig = QConfig(
            activation=HistogramObserver.with_args(reduce_range=True),
            weight=default_per_channel_weight_observer,
        )

        # we need to design the model
        class ConvLinearModel(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv1 = torch.nn.Conv2d(3, 3, 2, 1)
                self.fc1 = torch.nn.Linear(9, 27)
                self.relu = torch.nn.ReLU()
                self.fc2 = torch.nn.Linear(27, 27)
                self.conv2 = torch.nn.Conv2d(3, 3, 2, 1)

            def forward(self, x):
                x = self.conv1(x)
                x = self.fc1(x)
                x = self.relu(x)
                x = self.fc2(x)
                x = self.conv2(x)
                return x

        q_config_mapping = QConfigMapping()
        q_config_mapping.set_global(
            torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)
        ).set_object_type(torch.nn.Conv2d, per_channel_qconfig)

        prepared_model = self._prepare_model_and_run_input(
            ConvLinearModel(),
            q_config_mapping,
            torch.randn(1, 3, 10, 10),
        )

        # run the detector
        optims_str, per_channel_info = _detect_per_channel(prepared_model)

        # the only suggestions should be to linear layers

        # there should be optims possible
        self.assertNotEqual(
            optims_str,
            DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine),
        )

        # to ensure it got into the nested layer
        self.assertEqual(len(per_channel_info["per_channel_status"]), 4)

        # for each layer, should be supported but not used
        for key in per_channel_info["per_channel_status"].keys():
            module_entry = per_channel_info["per_channel_status"][key]
            self.assertEqual(module_entry["per_channel_supported"], True)

            # if linear False, if conv2d true cuz it uses different config
            if "fc" in key:
                self.assertEqual(module_entry["per_channel_used"], False)
            elif "conv" in key:
                self.assertEqual(module_entry["per_channel_used"], True)
            else:
                raise ValueError("Should only contain conv and linear layers as key values")
Ejemplo n.º 9
0
    def test_prepare_model_callibration(self):
        """
        Tests model_report.prepare_detailed_calibration that prepares the model for callibration
        Specifically looks at:
        - Whether observers are properly inserted into regular nn.Module
        - Whether the target and the arguments of the observers are proper
        - Whether the internal representation of observers of interest is updated
        """

        # example model to use for tests
        class ThreeOps(nn.Module):
            def __init__(self):
                super(ThreeOps, self).__init__()
                self.linear = nn.Linear(3, 3)
                self.bn = nn.BatchNorm2d(3)
                self.relu = nn.ReLU()

            def forward(self, x):
                x = self.linear(x)
                x = self.bn(x)
                x = self.relu(x)
                return x

        class TwoThreeOps(nn.Module):
            def __init__(self):
                super(TwoThreeOps, self).__init__()
                self.block1 = ThreeOps()
                self.block2 = ThreeOps()

            def forward(self, x):
                x = self.block1(x)
                y = self.block2(x)
                z = x + y
                z = F.relu(z)
                return z

        with override_quantized_engine('fbgemm'):
            # create model report object
            # make an example set of detectors
            torch.backends.quantized.engine = "fbgemm"
            backend = torch.backends.quantized.engine
            test_detector_set = set([DynamicStaticDetector(), PerChannelDetector(backend)])
            # initialize with an empty detector
            model_report = ModelReport(test_detector_set)

            # prepare the model
            model = TwoThreeOps()
            example_input = torch.randn(1, 3, 3, 3)
            current_backend = torch.backends.quantized.engine
            q_config_mapping = QConfigMapping()
            q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

            model_prep = quantize_fx.prepare_fx(model, q_config_mapping, example_input)

            # prepare the model for callibration
            prepared_for_callibrate_model = model_report.prepare_detailed_calibration(model_prep)

            # see whether observers properly in regular nn.Module
            # there should be 4 observers present in this case
            modules_observer_cnt = 0
            for fqn, module in prepared_for_callibrate_model.named_modules():
                if isinstance(module, ModelReportObserver):
                    modules_observer_cnt += 1

            self.assertEqual(modules_observer_cnt, 4)

            model_report_str_check = "model_report"
            # also make sure arguments for observers in the graph are proper
            for node in prepared_for_callibrate_model.graph.nodes:
                # not all node targets are strings, so check
                if isinstance(node.target, str) and model_report_str_check in node.target:
                    # if pre-observer has same args as the linear (next node)
                    if "pre_observer" in node.target:
                        self.assertEqual(node.args, node.next.args)
                    # if post-observer, args are the target linear (previous node)
                    if "post_observer" in node.target:
                        self.assertEqual(node.args, (node.prev,))

            # ensure model_report observers of interest updated
            # there should be two entries
            self.assertEqual(len(model_report.get_observers_of_interest()), 2)
            for detector in test_detector_set:
                self.assertTrue(detector.get_detector_name() in model_report.get_observers_of_interest().keys())

                # get number of entries for this detector
                detector_obs_of_interest_fqns = model_report.get_observers_of_interest()[detector.get_detector_name()]

                # assert that the per channel detector has 0 and the dynamic static has 4
                if isinstance(detector, PerChannelDetector):
                    self.assertEqual(len(detector_obs_of_interest_fqns), 0)
                elif isinstance(detector, DynamicStaticDetector):
                    self.assertEqual(len(detector_obs_of_interest_fqns), 4)

            # ensure that we can prepare for callibration only once
            with self.assertRaises(ValueError):
                prepared_for_callibrate_model = model_report.prepare_detailed_calibration(model_prep)
Ejemplo n.º 10
0
    def test_generate_report(self):
        """
            Tests model_report.generate_model_report to ensure report generation
            Specifically looks at:
            - Whether correct number of reports are being generated
            - Whether observers are being properly removed if specified
            - Whether correct blocking from generating report twice if obs removed
        """

        with override_quantized_engine('fbgemm'):
            # set the backend for this test
            torch.backends.quantized.engine = "fbgemm"

            # check whether the correct number of reports are being generated
            filled_detector_set = set([DynamicStaticDetector(), PerChannelDetector(torch.backends.quantized.engine)])
            single_detector_set = set([DynamicStaticDetector()])

            # initialize one with filled detector
            model_report_full = ModelReport(filled_detector_set)
            # initialize another with a single detector set
            model_report_single = ModelReport(single_detector_set)

            # prepare and callibrate two different instances of same model
            # prepare the model
            model_full = TestFxModelReportClass.TwoThreeOps()
            model_single = TestFxModelReportClass.TwoThreeOps()
            example_input = torch.randn(1, 3, 3, 3)
            current_backend = torch.backends.quantized.engine
            q_config_mapping = QConfigMapping()
            q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine))

            model_prep_full = quantize_fx.prepare_fx(model_full, q_config_mapping, example_input)
            model_prep_single = quantize_fx.prepare_fx(model_single, q_config_mapping, example_input)

            # prepare the models for callibration
            prepared_for_callibrate_model_full = model_report_full.prepare_detailed_calibration(model_prep_full)
            prepared_for_callibrate_model_single = model_report_single.prepare_detailed_calibration(model_prep_single)

            # now callibrate the two models
            num_iterations = 10
            for i in range(num_iterations):
                example_input = torch.tensor(torch.randint(100, (1, 3, 3, 3)), dtype=torch.float)
                prepared_for_callibrate_model_full(example_input)
                prepared_for_callibrate_model_single(example_input)

            # now generate the reports
            model_full_report = model_report_full.generate_model_report(
                prepared_for_callibrate_model_full, True
            )
            model_single_report = model_report_single.generate_model_report(prepared_for_callibrate_model_single, False)

            # check that sizes are appropriate
            self.assertEqual(len(model_full_report), len(filled_detector_set))
            self.assertEqual(len(model_single_report), len(single_detector_set))

            # make sure observers are being properly removed for full report since we put flag in
            modules_observer_cnt, graph_observer_cnt = self.get_module_and_graph_cnts(prepared_for_callibrate_model_full)
            self.assertEqual(modules_observer_cnt, 0)  # assert no more observer modules
            self.assertEqual(graph_observer_cnt, 0)  # assert no more observer nodes in graph

            # make sure observers aren't being removed for single report since not specified
            modules_observer_cnt, graph_observer_cnt = self.get_module_and_graph_cnts(prepared_for_callibrate_model_single)
            self.assertNotEqual(modules_observer_cnt, 0)
            self.assertNotEqual(graph_observer_cnt, 0)

            # make sure error when try to rerun report generation for full report but not single report
            with self.assertRaises(Exception):
                model_full_report = model_report_full.generate_model_report(
                    prepared_for_callibrate_model_full, False
                )

            # make sure we don't run into error for single report
            model_single_report = model_report_single.generate_model_report(prepared_for_callibrate_model_single, False)