def test_simple_conv(self): torch.backends.quantized.engine = "onednn" q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) input = torch.randn(1, 3, 10, 10) prepared_model = self._prepare_model_and_run_input(ConvModel(), q_config_mapping, input) # run the detector optims_str, per_channel_info = _detect_per_channel(prepared_model) # no optims possible and there should be nothing in per_channel_status self.assertEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) self.assertEqual(per_channel_info["backend"], torch.backends.quantized.engine) self.assertEqual(len(per_channel_info["per_channel_status"]), 1) self.assertEqual(list(per_channel_info["per_channel_status"])[0], "conv") self.assertEqual( per_channel_info["per_channel_status"]["conv"]["per_channel_supported"], True, ) self.assertEqual(per_channel_info["per_channel_status"]["conv"]["per_channel_used"], True)
def test_fusion_layer_in_sequential(self): torch.backends.quantized.engine = "fbgemm" q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) prepared_model = self._prepare_model_and_run_input( FUSION_CONV_LINEAR_EXAMPLE, q_config_mapping, torch.randn(1, 3, 10, 10), ) # run the detector optims_str, per_channel_info = _detect_per_channel(prepared_model) # no optims possible and there should be nothing in per_channel_status self.assertEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) # to ensure it got into the nested layer and it considered all the nested fusion components self.assertEqual(len(per_channel_info["per_channel_status"]), 4) # for each layer, should be supported but not used for key in per_channel_info["per_channel_status"].keys(): module_entry = per_channel_info["per_channel_status"][key] self.assertEqual(module_entry["per_channel_supported"], True) self.assertEqual(module_entry["per_channel_used"], True)
def test_multi_linear_model_without_per_channel(self): torch.backends.quantized.engine = "qnnpack" q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) prepared_model = self._prepare_model_and_run_input( TwoLayerLinearModel(), q_config_mapping, TwoLayerLinearModel().get_example_inputs()[0], ) # run the detector optims_str, per_channel_info = _detect_per_channel(prepared_model) # there should be optims possible self.assertNotEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) self.assertEqual(per_channel_info["backend"], torch.backends.quantized.engine) self.assertEqual(len(per_channel_info["per_channel_status"]), 2) # for each linear layer, should be supported but not used for linear_key in per_channel_info["per_channel_status"].keys(): module_entry = per_channel_info["per_channel_status"][linear_key] self.assertEqual(module_entry["per_channel_supported"], True) self.assertEqual(module_entry["per_channel_used"], False)
def test_conv_sub_class_considered(self): torch.backends.quantized.engine = "qnnpack" q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) prepared_model = self._prepare_model_and_run_input( LAZY_CONV_LINEAR_EXAMPLE, q_config_mapping, torch.randn(1, 3, 10, 10), ) # run the detector optims_str, per_channel_info = _detect_per_channel(prepared_model) # there should be optims possible self.assertNotEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) # to ensure it got into the nested layer and it considered the lazyConv2d self.assertEqual(len(per_channel_info["per_channel_status"]), 4) # for each layer, should be supported but not used for key in per_channel_info["per_channel_status"].keys(): module_entry = per_channel_info["per_channel_status"][key] self.assertEqual(module_entry["per_channel_supported"], True) self.assertEqual(module_entry["per_channel_used"], False)
def test_sequential_model_format(self): with override_quantized_engine('qnnpack'): torch.backends.quantized.engine = "qnnpack" q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) prepared_model = self._prepare_model_and_run_input( NESTED_CONV_LINEAR_EXAMPLE, q_config_mapping, torch.randn(1, 3, 10, 10), ) # run the detector per_channel_detector = PerChannelDetector(torch.backends.quantized.engine) optims_str, per_channel_info = per_channel_detector.generate_detector_report(prepared_model) # there should be optims possible self.assertNotEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) # to ensure it got into the nested layer self.assertEqual(len(per_channel_info["per_channel_status"]), 4) # for each layer, should be supported but not used for key in per_channel_info["per_channel_status"].keys(): module_entry = per_channel_info["per_channel_status"][key] self.assertEqual(module_entry["per_channel_supported"], True) self.assertEqual(module_entry["per_channel_used"], False)
def _get_qconfig_mapping(obj: Any, dict_key: str) -> Optional[QConfigMapping]: """ Convert the given object into a QConfigMapping if possible, else throw an exception. """ if isinstance(obj, QConfigMapping) or obj is None: return obj if isinstance(obj, Dict): return QConfigMapping.from_dict(obj) raise ValueError("Expected QConfigMapping in prepare_custom_config_dict[\"%s\"], got '%s'" % (dict_key, type(obj)))
def test_nested_detection_case(self): class SingleLinear(torch.nn.Module): def __init__(self): super(SingleLinear, self).__init__() self.linear = torch.nn.Linear(3, 3) def forward(self, x): x = self.linear(x) return x class TwoBlockNet(torch.nn.Module): def __init__(self): super(TwoBlockNet, self).__init__() self.block1 = SingleLinear() self.block2 = SingleLinear() def forward(self, x): x = self.block1(x) y = self.block2(x) z = x + y z = F.relu(z) return z # create model, example input, and qconfig mapping torch.backends.quantized.engine = "fbgemm" model = TwoBlockNet() example_input = torch.randint(-10, 0, (1, 3, 3, 3)) example_input = example_input.to(torch.float) q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig("fbgemm")) # prep model and select observer model_prep = quantize_fx.prepare_fx(model, q_config_mapping, example_input) obs_ctr = ModelReportObserver # find layer to attach to and store linear_fqn = "block2.linear" # fqn of target linear target_linear = None for node in model_prep.graph.nodes: if node.target == linear_fqn: target_linear = node break # insert into both module and graph pre and post # set up to insert before target_linear (pre_observer) with model_prep.graph.inserting_before(target_linear): obs_to_insert = obs_ctr() pre_obs_fqn = linear_fqn + ".model_report_pre_observer" model_prep.add_submodule(pre_obs_fqn, obs_to_insert) model_prep.graph.create_node(op="call_module", target=pre_obs_fqn, args=target_linear.args) # set up and insert after the target_linear (post_observer) with model_prep.graph.inserting_after(target_linear): obs_to_insert = obs_ctr() post_obs_fqn = linear_fqn + ".model_report_post_observer" model_prep.add_submodule(post_obs_fqn, obs_to_insert) model_prep.graph.create_node(op="call_module", target=post_obs_fqn, args=(target_linear,)) # need to recompile module after submodule added and pass input through model_prep.recompile() num_iterations = 10 for i in range(num_iterations): if i % 2 == 0: example_input = torch.randint(-10, 0, (1, 3, 3, 3)).to(torch.float) else: example_input = torch.randint(0, 10, (1, 3, 3, 3)).to(torch.float) model_prep(example_input) # run it through the dynamic vs static detector dynam_vs_stat_str, dynam_vs_stat_dict = _detect_dynamic_vs_static(model_prep, tolerance=0.5) # one of the stats should be stationary, and the other non-stationary # as a result, dynamic should be recommended data_dist_info = [ dynam_vs_stat_dict[linear_fqn]["pre_observer_data_dist"], dynam_vs_stat_dict[linear_fqn]["post_observer_data_dist"], ] self.assertTrue("stationary" in data_dist_info) self.assertTrue("non-stationary" in data_dist_info) self.assertTrue(dynam_vs_stat_dict[linear_fqn]["dynamic_recommended"])
def test_multiple_q_config_options(self): torch.backends.quantized.engine = "qnnpack" # qconfig with support for per_channel quantization per_channel_qconfig = QConfig( activation=HistogramObserver.with_args(reduce_range=True), weight=default_per_channel_weight_observer, ) # we need to design the model class ConvLinearModel(torch.nn.Module): def __init__(self): super().__init__() self.conv1 = torch.nn.Conv2d(3, 3, 2, 1) self.fc1 = torch.nn.Linear(9, 27) self.relu = torch.nn.ReLU() self.fc2 = torch.nn.Linear(27, 27) self.conv2 = torch.nn.Conv2d(3, 3, 2, 1) def forward(self, x): x = self.conv1(x) x = self.fc1(x) x = self.relu(x) x = self.fc2(x) x = self.conv2(x) return x q_config_mapping = QConfigMapping() q_config_mapping.set_global( torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine) ).set_object_type(torch.nn.Conv2d, per_channel_qconfig) prepared_model = self._prepare_model_and_run_input( ConvLinearModel(), q_config_mapping, torch.randn(1, 3, 10, 10), ) # run the detector optims_str, per_channel_info = _detect_per_channel(prepared_model) # the only suggestions should be to linear layers # there should be optims possible self.assertNotEqual( optims_str, DEFAULT_NO_OPTIMS_ANSWER_STRING.format(torch.backends.quantized.engine), ) # to ensure it got into the nested layer self.assertEqual(len(per_channel_info["per_channel_status"]), 4) # for each layer, should be supported but not used for key in per_channel_info["per_channel_status"].keys(): module_entry = per_channel_info["per_channel_status"][key] self.assertEqual(module_entry["per_channel_supported"], True) # if linear False, if conv2d true cuz it uses different config if "fc" in key: self.assertEqual(module_entry["per_channel_used"], False) elif "conv" in key: self.assertEqual(module_entry["per_channel_used"], True) else: raise ValueError("Should only contain conv and linear layers as key values")
def test_prepare_model_callibration(self): """ Tests model_report.prepare_detailed_calibration that prepares the model for callibration Specifically looks at: - Whether observers are properly inserted into regular nn.Module - Whether the target and the arguments of the observers are proper - Whether the internal representation of observers of interest is updated """ # example model to use for tests class ThreeOps(nn.Module): def __init__(self): super(ThreeOps, self).__init__() self.linear = nn.Linear(3, 3) self.bn = nn.BatchNorm2d(3) self.relu = nn.ReLU() def forward(self, x): x = self.linear(x) x = self.bn(x) x = self.relu(x) return x class TwoThreeOps(nn.Module): def __init__(self): super(TwoThreeOps, self).__init__() self.block1 = ThreeOps() self.block2 = ThreeOps() def forward(self, x): x = self.block1(x) y = self.block2(x) z = x + y z = F.relu(z) return z with override_quantized_engine('fbgemm'): # create model report object # make an example set of detectors torch.backends.quantized.engine = "fbgemm" backend = torch.backends.quantized.engine test_detector_set = set([DynamicStaticDetector(), PerChannelDetector(backend)]) # initialize with an empty detector model_report = ModelReport(test_detector_set) # prepare the model model = TwoThreeOps() example_input = torch.randn(1, 3, 3, 3) current_backend = torch.backends.quantized.engine q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) model_prep = quantize_fx.prepare_fx(model, q_config_mapping, example_input) # prepare the model for callibration prepared_for_callibrate_model = model_report.prepare_detailed_calibration(model_prep) # see whether observers properly in regular nn.Module # there should be 4 observers present in this case modules_observer_cnt = 0 for fqn, module in prepared_for_callibrate_model.named_modules(): if isinstance(module, ModelReportObserver): modules_observer_cnt += 1 self.assertEqual(modules_observer_cnt, 4) model_report_str_check = "model_report" # also make sure arguments for observers in the graph are proper for node in prepared_for_callibrate_model.graph.nodes: # not all node targets are strings, so check if isinstance(node.target, str) and model_report_str_check in node.target: # if pre-observer has same args as the linear (next node) if "pre_observer" in node.target: self.assertEqual(node.args, node.next.args) # if post-observer, args are the target linear (previous node) if "post_observer" in node.target: self.assertEqual(node.args, (node.prev,)) # ensure model_report observers of interest updated # there should be two entries self.assertEqual(len(model_report.get_observers_of_interest()), 2) for detector in test_detector_set: self.assertTrue(detector.get_detector_name() in model_report.get_observers_of_interest().keys()) # get number of entries for this detector detector_obs_of_interest_fqns = model_report.get_observers_of_interest()[detector.get_detector_name()] # assert that the per channel detector has 0 and the dynamic static has 4 if isinstance(detector, PerChannelDetector): self.assertEqual(len(detector_obs_of_interest_fqns), 0) elif isinstance(detector, DynamicStaticDetector): self.assertEqual(len(detector_obs_of_interest_fqns), 4) # ensure that we can prepare for callibration only once with self.assertRaises(ValueError): prepared_for_callibrate_model = model_report.prepare_detailed_calibration(model_prep)
def test_generate_report(self): """ Tests model_report.generate_model_report to ensure report generation Specifically looks at: - Whether correct number of reports are being generated - Whether observers are being properly removed if specified - Whether correct blocking from generating report twice if obs removed """ with override_quantized_engine('fbgemm'): # set the backend for this test torch.backends.quantized.engine = "fbgemm" # check whether the correct number of reports are being generated filled_detector_set = set([DynamicStaticDetector(), PerChannelDetector(torch.backends.quantized.engine)]) single_detector_set = set([DynamicStaticDetector()]) # initialize one with filled detector model_report_full = ModelReport(filled_detector_set) # initialize another with a single detector set model_report_single = ModelReport(single_detector_set) # prepare and callibrate two different instances of same model # prepare the model model_full = TestFxModelReportClass.TwoThreeOps() model_single = TestFxModelReportClass.TwoThreeOps() example_input = torch.randn(1, 3, 3, 3) current_backend = torch.backends.quantized.engine q_config_mapping = QConfigMapping() q_config_mapping.set_global(torch.ao.quantization.get_default_qconfig(torch.backends.quantized.engine)) model_prep_full = quantize_fx.prepare_fx(model_full, q_config_mapping, example_input) model_prep_single = quantize_fx.prepare_fx(model_single, q_config_mapping, example_input) # prepare the models for callibration prepared_for_callibrate_model_full = model_report_full.prepare_detailed_calibration(model_prep_full) prepared_for_callibrate_model_single = model_report_single.prepare_detailed_calibration(model_prep_single) # now callibrate the two models num_iterations = 10 for i in range(num_iterations): example_input = torch.tensor(torch.randint(100, (1, 3, 3, 3)), dtype=torch.float) prepared_for_callibrate_model_full(example_input) prepared_for_callibrate_model_single(example_input) # now generate the reports model_full_report = model_report_full.generate_model_report( prepared_for_callibrate_model_full, True ) model_single_report = model_report_single.generate_model_report(prepared_for_callibrate_model_single, False) # check that sizes are appropriate self.assertEqual(len(model_full_report), len(filled_detector_set)) self.assertEqual(len(model_single_report), len(single_detector_set)) # make sure observers are being properly removed for full report since we put flag in modules_observer_cnt, graph_observer_cnt = self.get_module_and_graph_cnts(prepared_for_callibrate_model_full) self.assertEqual(modules_observer_cnt, 0) # assert no more observer modules self.assertEqual(graph_observer_cnt, 0) # assert no more observer nodes in graph # make sure observers aren't being removed for single report since not specified modules_observer_cnt, graph_observer_cnt = self.get_module_and_graph_cnts(prepared_for_callibrate_model_single) self.assertNotEqual(modules_observer_cnt, 0) self.assertNotEqual(graph_observer_cnt, 0) # make sure error when try to rerun report generation for full report but not single report with self.assertRaises(Exception): model_full_report = model_report_full.generate_model_report( prepared_for_callibrate_model_full, False ) # make sure we don't run into error for single report model_single_report = model_report_single.generate_model_report(prepared_for_callibrate_model_single, False)