def test_quantize_inputs(): model = QuantizeInputsTestModel() config = get_quantization_config_without_range_init() config["input_info"] = [{ "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }] model, _ = create_compressed_model_and_algo_for_test(model, config) REF_QUANTIZED_INPUT_MODULE_SCOPES = [ '/nncf_model_input_0', '/nncf_model_input_1', '/nncf_model_input_2', '/nncf_model_input_3', '/nncf_model_input_4' ] actual_input_quantizer_str_scopes =\ [str_scope for str_scope in model.activation_quantizers if 'nncf_model_input' in str_scope] assert len(REF_QUANTIZED_INPUT_MODULE_SCOPES) == len( actual_input_quantizer_str_scopes) for ref_qinput_scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES: assert isinstance(model.activation_quantizers[ref_qinput_scope_str], SymmetricQuantizer)
def create_autoq_test_config(batch_size=10, image_size=10, num_channels=3, num_init_samples=1): config = get_quantization_config_without_range_init() config['input_info'] = { "sample_size": [batch_size, num_channels, image_size, image_size], } config['batch_size'] = batch_size config['compression'].update({ 'initializer': { 'precision': { "type": "autoq", "bits": [2, 4, 8], "iter_number": 2, "compression_ratio": 0.15, "eval_subset_ratio": 1.0, "warmup_iter_number": 1 }, 'range': { 'num_init_samples': num_init_samples }, 'batchnorm_adaptation': { 'num_bn_adaptation_samples': 0, 'num_bn_forget_samples': 0 } } }) return config
def test_can_load_quant_algo__with_defaults(): model = BasicConvTestModel() config = get_quantization_config_without_range_init() compression_algo_builder_list = create_compression_algorithm_builders(config) assert len(compression_algo_builder_list) == 1 assert isinstance(compression_algo_builder_list[0], QuantizationBuilder) quant_model, _ = create_compressed_model_and_algo_for_test(deepcopy(model), config) model_conv = get_all_modules_by_type(model, 'Conv2d') quant_model_conv = get_all_modules_by_type(quant_model.get_nncf_wrapped_model(), 'NNCFConv2d') assert len(model_conv) == len(quant_model_conv) for module_scope, _ in model_conv.items(): quant_scope = deepcopy(module_scope) # type: Scope quant_scope.pop() quant_scope.push(ScopeElement('NNCFConv2d', 'conv')) assert quant_scope in quant_model_conv.keys() store = [] for op in quant_model_conv[quant_scope].pre_ops.values(): if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance(op.operand, SymmetricQuantizer): assert op.__class__.__name__ not in store store.append(op.__class__.__name__) assert UpdateWeight.__name__ in store
def create_hawq_test_config(batch_size=10, num_data_points=100, image_size=10): config = get_quantization_config_without_range_init() config['input_info'] = { "sample_size": [batch_size, 3, image_size, image_size], } config['batch_size'] = batch_size config['compression'].update({ 'initializer': { 'precision': { "type": "hawq", "bits": [4, 8, 6], "num_data_points": num_data_points, "iter_number": 1, "tolerance": 1e-2 }, 'range': { 'num_init_samples': 1 }, 'batchnorm_adaptation': { 'num_bn_adaptation_samples': 0, 'num_bn_forget_samples': 0 } } }) return config
def test_quantization_configs__with_precisions_list(): model = ModelForTest() config = get_quantization_config_without_range_init() config['compression']['initializer'].update({ "precision": { "bitwidth_per_scope": [[2, 'ModelForTest/NNCFConv2d[conv1]'], [4, 'ModelForTest/NNCFConv2d[conv2]']] } }) config['compression']["activations"] = {"bits": 6} config['quantizer_setup_type'] = 'pattern_based' model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) ref_bits = [('ModelForTest/NNCFConv2d[conv1]module_weight', 2), ('ModelForTest/NNCFConv2d[conv2]module_weight', 4), ('ModelForTest/NNCFConv2d[conv2]/conv2d_0|OUTPUT', 6), ('ModelForTest/NNCFConv2d[conv1]/conv2d_0|OUTPUT', 6), ('/nncf_model_input_0|OUTPUT', 6)] for key, quantizer in compression_ctrl.all_quantizations.items(): expected_bit = [ ref_bit for (name, ref_bit) in ref_bits if name == str(key) ][0] assert quantizer.num_bits == expected_bit, 'Unexpected number of bits for {}'.format( key) ref_rows = [['2', '20', '0', '20'], ['4', '20', '0', '20'], ['6', '0', '60', '60']] table = compression_ctrl.non_stable_metric_collectors[0].get_bits_stat() # pylint: disable=protected-access assert table._rows == ref_rows
def test_can_quantize_free_operators(mocker): class Model(nn.Module): def __init__(self): super().__init__() self.weight = nn.Parameter(torch.ones([1])) self.bias = nn.Parameter(torch.ones([1])) def forward(self, x): return F.linear(x, self.weight, self.bias) mod = Model() config = get_quantization_config_without_range_init(model_size=1) config["compression"].update({"quantize_inputs": False}) quant_model, _ = create_compressed_model_and_algo_for_test(mod, config) quantizer_list = quant_model.get_compression_modules_by_type( CompressionModuleType.FUNCTION_QUANTIZER).values() assert len(quantizer_list) == 2 for quantizer in quantizer_list: mocker.spy(quantizer, 'quantize') quant_model.do_dummy_forward() for quantizer in quantizer_list: assert quantizer.quantize.call_count == 1
def test_can_create_quant_loss_and_scheduler(): config = get_quantization_config_without_range_init() _, compression_ctrl = create_compressed_model_and_algo_for_test(MockModel(), config) loss = compression_ctrl.loss assert isinstance(loss, CompressionLoss) scheduler = compression_ctrl.scheduler assert isinstance(scheduler, CompressionScheduler)
def test_get_weight_activation_pairs__with_double_weights_per_activation(): model_cls = DoubleWeightsPerActivation model_name = model_cls.__name__ config = get_quantization_config_without_range_init() _, algo = create_compressed_model_and_algo_for_test(model_cls(), config) actual_pairs = algo.get_weights_activation_quantizers_pairs() ref_pair_names = [(['NNCFConv2d[conv1]module_weight', 'NNCFConv2d[conv2]module_weight'], 'ReLU[relu]/RELU_0')] compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_name)
def test_get_weight_activation_pairs(): model_cls = TwoConvTestModel config = get_quantization_config_without_range_init() _, algo = create_compressed_model_and_algo_for_test(model_cls(), config) actual_pairs = algo.get_weights_activation_quantizers_pairs() ref_pair_names = [(['Sequential[features]/Sequential[0]/NNCFConv2d[0]module_weight'], 'Sequential[features]/Sequential[0]/NNCFConv2d[0]module_input', ), (['Sequential[features]/Sequential[1]/NNCFConv2d[0]module_weight'], 'Sequential[features]/Sequential[0]/NNCFConv2d[0]/conv2d_0', )] compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_cls.__name__)
def get_model_and_ctrl_with_applied_hw_config_quantization(model: torch.nn.Module, hw_config_dict: dict, should_be_quantize_inputs: bool = True): nncf_config = get_quantization_config_without_range_init(model_size=1) nncf_config["compression"].update({"quantize_inputs": should_be_quantize_inputs}) nncf_config["hw_config_type"] = "mock" net = NNCFNetwork(model, input_infos=[ModelInputInfo([1, 2, 1, 1])]) hw_config = HWConfig.from_dict(hw_config_dict) qbuilder = QuantizationBuilder(nncf_config["compression"], should_init=False) qbuilder.quantizer_setup_type = QuantizerSetupType.PROPAGATION_BASED qbuilder.hw_config = hw_config net = qbuilder.apply_to(net) ctrl = net.commit_compression_changes() return net, ctrl
def test_staged_quantization_saves_enabled_quantizers_in_state_dict(tmp_path): config = get_quantization_config_without_range_init() config["compression"]["params"] = { "activations_quant_start_epoch": 2, "weights_quant_start_epoch": 1 } model_save, ctrl_save = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config) ctrl_save.scheduler.epoch_step() _, ctrl_load = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config, resuming_state_dict=model_save.state_dict()) for quantizer_info in ctrl_load.non_weight_quantizers.values(): assert not quantizer_info.quantizer_module_ref.is_enabled_quantization() for quantizer in ctrl_load.weight_quantizers.values(): assert quantizer.is_enabled_quantization()
def test_get_weight_activation_pairs__with_extra_module(): model_cls = DoubleWeightsPerActivationWithExtraModule model_name = model_cls.__name__ config = get_quantization_config_without_range_init() config["compression"].update({ "quantizable_subgraph_patterns": [["sigmoid", "conv2d"]], "quantize_inputs": False}) _, algo = create_compressed_model_and_algo_for_test(model_cls(), config) actual_pairs = algo.get_weights_activation_quantizers_pairs() ref_pair_names = [(['NNCFConv2d[conv1]module_weight', 'NNCFConv2d[conv2]module_weight'], 'ReLU[relu]/RELU_0')] compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_name)
def test_quantization_configs__with_defaults(): model = BasicConvTestModel() config = get_quantization_config_without_range_init() _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) assert isinstance(compression_ctrl, QuantizationController) weight_quantizers = compression_ctrl.weight_quantizers activation_quantizers = compression_ctrl.non_weight_quantizers ref_weight_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, True) for wq in weight_quantizers.values(): compare_qconfigs(ref_weight_qconfig, wq) ref_activation_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, False) for wq in activation_quantizers.values(): compare_qconfigs(ref_activation_qconfig, wq)
def test_load_state_sets_initialized_flag(): config = get_quantization_config_without_range_init() model = TwoConvTestModel() quant_model, _ = create_compressed_model_and_algo_for_test(model, config) load_state(quant_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([1.0]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': torch.tensor([1.0]) # quantizer of 2nd conv's weights }) quantizers = get_all_modules_by_type(quant_model, 'SymmetricQuantizer') for scope, module in quantizers.items(): if 'activation_quantizers' in str(scope) or 'UpdateInputs' in str(scope): assert not module.initialized else: assert module.initialized
def disable_quantizer_gradients(): config = get_quantization_config_without_range_init() config['input_info'] = { "sample_size": [1, 3, 10, 10], } model = MobileNetV2(num_classes=10) model.eval() model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) original_requires_grad_per_param = get_requires_grad_per_param(model) quantization_types = [class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values()] all_quantizations = get_all_modules_by_type(model, quantization_types) quantizers_switcher = QuantizersSwitcher(list(all_quantizations.values())) disabled_parameters = HAWQPrecisionInitializer.disable_all_gradients_except_weights_of_quantized_modules( quantizers_switcher, compression_ctrl.quantized_weight_modules_registry, model, get_scopes_of_skipped_weight_quantizers()) return quantizers_switcher, disabled_parameters, model, original_requires_grad_per_param
def test_quantization_configs__custom(): model = BasicConvTestModel() config = get_quantization_config_without_range_init() config['compression'].update({ "weights": { "mode": "asymmetric", "per_channel": True, "bits": 4 }, "activations": { "mode": "asymmetric", "bits": 4, "signed": True, }, }) config['target_device'] = 'NONE' _, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) assert isinstance(compression_ctrl, QuantizationController) weight_quantizers = compression_ctrl.weight_quantizers activation_quantizer_infos = compression_ctrl.non_weight_quantizers ref_weight_qconfig = QuantizerConfig(bits=4, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=True, input_shape=None, is_weights=True) for wq in weight_quantizers.values(): compare_qconfigs(ref_weight_qconfig, wq) ref_activation_qconfig = QuantizerConfig(bits=4, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=False, input_shape=None, is_weights=False) for aq_info in activation_quantizer_infos.values(): compare_qconfigs(ref_activation_qconfig, aq_info.quantizer_module_ref)
def test_unified_scales_for_vpu(): nncf_config = get_quantization_config_without_range_init(model_size=1) nncf_config["compression"]["quantize_outputs"] = True nncf_config["input_info"] = [{ "sample_size": [1, 1, 1, 1], }, { "sample_size": [1, 1, 1, 1], }] nncf_config["target_device"] = "VPU" _, compression_ctrl = create_compressed_model_and_algo_for_test( QuantizerLinkingTestModel(), nncf_config) assert len(compression_ctrl.non_weight_quantizers) == 2 total_quantizations = sum([ len(info.affected_insertions) for info in compression_ctrl.non_weight_quantizers.values() ]) assert total_quantizations == 8
def test_quantize_inputs(): model = QuantizeInputsTestModel() config = get_quantization_config_without_range_init() config["input_info"] = [ { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], }, { "sample_size": [2, 3, 32, 32], } ] model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) REF_QUANTIZED_INPUT_MODULE_SCOPES = [ "QuantizeInputsTestModel/NNCFConv2d[conv1]", "QuantizeInputsTestModel/NNCFConv2d[conv2]", "QuantizeInputsTestModel/NNCFConv2d[conv5]", "QuantizeInputsTestModel/NNCFConv2d[conv6]", ] for ref_qinput_module_scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES: scope = Scope.from_str(ref_qinput_module_scope_str) assert model.get_module_by_scope(scope) is not None assert ref_qinput_module_scope_str in compression_ctrl.quantized_inputs_modules_registry nncf_modules_dict = model.get_nncf_modules() for scope, nncf_module in nncf_modules_dict.items(): scope_str = str(scope) update_inputs_count = sum(1 for pre_op in nncf_module.pre_ops.values() if isinstance(pre_op, UpdateInputs)) if scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES: assert update_inputs_count == 1 else: assert update_inputs_count == 0
def test_quantize_has_proper_is_weights_flag(): class Model(nn.Module): def __init__(self, size=1): super().__init__() self.size = size self.conv = nn.Conv2d(size, size, size) def forward(self, x): return self.conv(x) model = Model() config = get_quantization_config_without_range_init(model_size=2) quant_model, _ = create_compressed_model_and_algo_for_test(model, config) for module in quant_model.modules(): if isinstance(module, NNCFConv2d): for op in module.pre_ops.values(): assert isinstance(op, (UpdateWeight, UpdateInputs)) assert op.operand.is_weights == isinstance(op, UpdateWeight) for _, aq in quant_model.get_compression_modules_by_type(CompressionModuleType.ACTIVATION_QUANTIZER).items(): assert aq.is_weights is False
def test_quantization_configs__with_precisions_list(): class ModelForTest(nn.Module): def __init__(self): super().__init__() self.conv1 = create_conv(1, 2, 2, -1, -2) self.conv2 = create_conv(1, 2, 2, -1, -2) def forward(self, x): return self.conv1(x) + self.conv2(x) model = ModelForTest() config = get_quantization_config_without_range_init() config['compression']['initializer'].update({ "precision": { "bitwidth_per_scope": [[2, 'ModelForTest/NNCFConv2d[conv1]'], [4, 'ModelForTest/NNCFConv2d[conv2]']] }}) config['compression']["activations"] = {"bits": 6} model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) ref_bits = [('ModelForTest/NNCFConv2d[conv1]module_weight', 2), ('ModelForTest/NNCFConv2d[conv2]module_weight', 4), ('ModelForTest/NNCFConv2d[conv2]/conv2d_0', 6), ('ModelForTest/NNCFConv2d[conv1]/conv2d_0', 6), ('ModelForTest/NNCFConv2d[conv1]module_input', 2), ('ModelForTest/NNCFConv2d[conv2]module_input', 4)] for key, quantizer in compression_ctrl.all_quantizations.items(): expected_bit = [ref_bit for (name, ref_bit) in ref_bits if name == str(key)][0] assert quantizer.num_bits == expected_bit, 'Unexpected number of bits for {}'.format(key) ref_rows = [['2', '16.667', '16.667', '33.333'], ['4', '16.667', '16.667', '33.333'], ['6', '0', '33.333', '33.333']] table = compression_ctrl.get_bit_stats() # pylint: disable=protected-access assert table._rows == ref_rows
def get_basic_asym_quantization_config(model_size=4): config = get_quantization_config_without_range_init(model_size) config['compression']['activations'] = {"mode": "asymmetric"} config['compression']['initializer']['range'] = {"num_init_steps": 0} return config
def get_quantization_config_with_ignored_scope(): config = get_quantization_config_without_range_init() config['compression']['ignored_scopes'] = 'ConvLinear/NNCFLinear[fc]' return config
def test_quantizer_scale_linking(): nncf_config = get_quantization_config_without_range_init(model_size=1) nncf_config['quantizer_setup_type'] = 'pattern_based' nncf_config["compression"]["quantize_outputs"] = True nncf_config["compression"]["quantize_inputs"] = False nncf_config["input_info"] = [{ "sample_size": [1, 1, 1, 1], }, { "sample_size": [1, 1, 1, 1], }] nncf_config["compression"]["activations"] = { "linked_quantizer_scopes": [[ # Note: Assuming that quantizers are attached as a post-op to the specified operation "QuantizerLinkingTestModel/Path[path2]/__mul___0", "QuantizerLinkingTestModel/Path[path2]/__add___0", ]], "ignored_scopes": [ # Ignore path output averaging operations "QuantizerLinkingTestModel/__add___0", "QuantizerLinkingTestModel/__add___1", "QuantizerLinkingTestModel/__add___2", ] } compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( QuantizerLinkingTestModel(), nncf_config) # 2 paths x 3 quantizers - 1 because two are shared in one path assert len(compression_ctrl.non_weight_quantizers) == 5 test_input1 = torch.ones([1, 1, 1, 1]) test_input2 = 2 * test_input1 non_shared_mul_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path1]/__mul___0")) non_shared_add_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path1]/__add___0")) shared_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path2]/__add___0")) non_shared_mul_quantizer = compression_ctrl.non_weight_quantizers[ non_shared_mul_quantizer_id].quantizer_module_ref non_shared_add_quantizer = compression_ctrl.non_weight_quantizers[ non_shared_add_quantizer_id].quantizer_module_ref shared_quantizer = compression_ctrl.non_weight_quantizers[ shared_quantizer_id].quantizer_module_ref old_scale = 765.0 # so that the quantum is equal to 3 with torch.no_grad(): for quantizer in compression_ctrl.all_quantizations.values(): quantizer.scale.fill_(old_scale) # Expected outputs without compression - 6, 12, 8. Scale deliberately set to preserve the values uncompressed_expected_outputs = (6.0 * torch.ones([1]), 12.0 * torch.ones([1]), 18.0 * torch.ones([1])) outputs_with_shared_scale_1 = compressed_model(test_input1, test_input2) for uncomp_out, comp_out_1 in zip(uncompressed_expected_outputs, outputs_with_shared_scale_1): assert torch.allclose(uncomp_out, comp_out_1) # Specifically clip the shared quantizer's outputs by setting scale to 1.0 new_shared_scale = 1.0 with torch.no_grad(): shared_quantizer.scale.fill_(new_shared_scale) outputs_with_shared_scale_2 = compressed_model(test_input1, test_input2) # __add___0 outputs assert torch.allclose(outputs_with_shared_scale_2[0], 4.0 * torch.ones([1])) # __mul___0 outputs assert torch.allclose(outputs_with_shared_scale_2[1], 7.0 * torch.ones([1])) # __add___1 outputs assert torch.allclose(outputs_with_shared_scale_2[2], 12.0 * torch.ones([1])) # Clipping the non-shared quantizers at the same position in the path as the two shared ones # in the same manner is required to simulate the same grad input for both the shared quantizers # and the unshared ones with torch.no_grad(): non_shared_mul_quantizer.scale.fill_(new_shared_scale) non_shared_add_quantizer.scale.fill_(new_shared_scale) final_output = compressed_model(test_input1, test_input2)[2] final_output.backward() assert torch.allclose( shared_quantizer.scale.grad, non_shared_mul_quantizer.scale.grad + non_shared_add_quantizer.scale.grad)
def test_unified_scales_are_identical_in_onnx(tmp_path): # pylint:disable=no-member nncf_config = get_quantization_config_without_range_init(model_size=1) nncf_config["compression"]["quantize_outputs"] = True nncf_config["input_info"] = [ { "sample_size": [1, 1, 1, 2], }, ] nncf_config["target_device"] = "VPU" compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( SimplerModelForUnifiedScalesTesting(), nncf_config) with torch.no_grad(): for quant_info in compression_ctrl.non_weight_quantizers.values(): if isinstance(quant_info.quantizer_module_ref, AsymmetricQuantizer): quant_info.quantizer_module_ref.input_range *= torch.abs( torch.rand_like( quant_info.quantizer_module_ref.input_range)) else: quant_info.quantizer_module_ref.scale *= torch.abs( torch.rand_like(quant_info.quantizer_module_ref.scale)) test_input1 = torch.ones([1, 1, 1, 2]) compressed_model.forward(test_input1) onnx_path = tmp_path / "model.onnx" compression_ctrl.export_model(onnx_path) onnx_model = onnx.load(onnx_path) def get_fq_nodes(onnx_model: onnx.ModelProto) -> List[onnx.NodeProto]: retval = [] for node in onnx_model.graph.node: if str(node.op_type) == "FakeQuantize": retval.append(node) return retval def immediately_dominates_add_or_mul(node: onnx.NodeProto, graph: onnx.GraphProto) -> bool: if len(node.output) != 1: return False output_tensor_id = node.output[0] matches = [x for x in graph.node if output_tensor_id in x.input] for match in matches: if match.op_type in ["Add", "Mul"]: return True return False def get_successor(node: onnx.NodeProto, graph: onnx.GraphProto) -> onnx.NodeProto: assert len( node.output ) == 1 # Only single-output nodes are supported in this func for target_node in graph.node: if node.output[0] in target_node.input: return target_node return None def group_nodes_by_output_target( nodes: List[onnx.NodeProto], graph: onnx.GraphProto) -> List[List[onnx.NodeProto]]: output_nodes = {} # type: Dict[str, List[onnx.NodeProto]] for node in nodes: target_node_name = get_successor(node, graph).name if target_node_name not in output_nodes: output_nodes[target_node_name] = [] output_nodes[target_node_name].append(node) return list(output_nodes.values()) def resolve_constant_node_inputs_to_values(node: onnx.NodeProto, graph: onnx.GraphProto) -> \ Dict[str, onnx.AttributeProto]: retval = {} for input_ in node.input: constant_input_nodes = [ x for x in graph.node if input_ in x.output and x.op_type == "Constant" ] for constant_input_node in constant_input_nodes: assert len(constant_input_node.attribute) == 1 val = constant_input_node.attribute[0] retval[input_] = numpy_helper.to_array(val.t) return retval fq_nodes = get_fq_nodes(onnx_model) eltwise_predicate = partial(immediately_dominates_add_or_mul, graph=onnx_model.graph) eltwise_fq_nodes = list(filter(eltwise_predicate, fq_nodes)) fq_nodes_grouped_by_output = group_nodes_by_output_target( eltwise_fq_nodes, onnx_model.graph) for unified_scale_group in fq_nodes_grouped_by_output: inputs = [ resolve_constant_node_inputs_to_values(fq_node, onnx_model.graph) for fq_node in unified_scale_group ] for inputs_dict in inputs[1:]: curr_values = list(inputs_dict.values()) ref_values = list(inputs[0].values()) assert curr_values == ref_values # All inputs for unified scale quantizers must be equal