def make_op_exec_context_for_coalescing_test(scope_str: str) -> OperationExecutionContext: ia_op_exec_context = InputAgnosticOperationExecutionContext.from_str(scope_str) op_exec_context = OperationExecutionContext(ia_op_exec_context.operator_name, ia_op_exec_context.scope_in_model, ia_op_exec_context.call_order, [TensorMeta(0, 0, [1])]) return op_exec_context
def generate_qp(scope_str: str, target: QuantizerGroup, in_port_id: int = None) -> SingleConfigQuantizationPoint: if target is QuantizerGroup.WEIGHTS: ip = InsertionPoint(InsertionType.NNCF_MODULE_PRE_OP, module_scope=Scope.from_str(scope_str)) elif target is QuantizerGroup.ACTIVATIONS: ip = InsertionPoint( InsertionType.OPERATOR_POST_HOOK if in_port_id is None else InsertionType.OPERATOR_PRE_HOOK, ia_op_exec_context=InputAgnosticOperationExecutionContext.from_str( scope_str), input_port_id=in_port_id) else: raise RuntimeError() return SingleConfigQuantizationPoint(ip, QuantizerConfig())
def test_quantizer_scale_linking(): nncf_config = get_quantization_config_without_range_init(model_size=1) nncf_config['quantizer_setup_type'] = 'pattern_based' nncf_config["compression"]["quantize_outputs"] = True nncf_config["compression"]["quantize_inputs"] = False nncf_config["input_info"] = [{ "sample_size": [1, 1, 1, 1], }, { "sample_size": [1, 1, 1, 1], }] nncf_config["compression"]["activations"] = { "linked_quantizer_scopes": [[ # Note: Assuming that quantizers are attached as a post-op to the specified operation "QuantizerLinkingTestModel/Path[path2]/__mul___0", "QuantizerLinkingTestModel/Path[path2]/__add___0", ]], "ignored_scopes": [ # Ignore path output averaging operations "QuantizerLinkingTestModel/__add___0", "QuantizerLinkingTestModel/__add___1", "QuantizerLinkingTestModel/__add___2", ] } compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( QuantizerLinkingTestModel(), nncf_config) # 2 paths x 3 quantizers - 1 because two are shared in one path assert len(compression_ctrl.non_weight_quantizers) == 5 test_input1 = torch.ones([1, 1, 1, 1]) test_input2 = 2 * test_input1 non_shared_mul_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path1]/__mul___0")) non_shared_add_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path1]/__add___0")) shared_quantizer_id = NonWeightQuantizerId( InputAgnosticOperationExecutionContext.from_str( "QuantizerLinkingTestModel/Path[path2]/__add___0")) non_shared_mul_quantizer = compression_ctrl.non_weight_quantizers[ non_shared_mul_quantizer_id].quantizer_module_ref non_shared_add_quantizer = compression_ctrl.non_weight_quantizers[ non_shared_add_quantizer_id].quantizer_module_ref shared_quantizer = compression_ctrl.non_weight_quantizers[ shared_quantizer_id].quantizer_module_ref old_scale = 765.0 # so that the quantum is equal to 3 with torch.no_grad(): for quantizer in compression_ctrl.all_quantizations.values(): quantizer.scale.fill_(old_scale) # Expected outputs without compression - 6, 12, 8. Scale deliberately set to preserve the values uncompressed_expected_outputs = (6.0 * torch.ones([1]), 12.0 * torch.ones([1]), 18.0 * torch.ones([1])) outputs_with_shared_scale_1 = compressed_model(test_input1, test_input2) for uncomp_out, comp_out_1 in zip(uncompressed_expected_outputs, outputs_with_shared_scale_1): assert torch.allclose(uncomp_out, comp_out_1) # Specifically clip the shared quantizer's outputs by setting scale to 1.0 new_shared_scale = 1.0 with torch.no_grad(): shared_quantizer.scale.fill_(new_shared_scale) outputs_with_shared_scale_2 = compressed_model(test_input1, test_input2) # __add___0 outputs assert torch.allclose(outputs_with_shared_scale_2[0], 4.0 * torch.ones([1])) # __mul___0 outputs assert torch.allclose(outputs_with_shared_scale_2[1], 7.0 * torch.ones([1])) # __add___1 outputs assert torch.allclose(outputs_with_shared_scale_2[2], 12.0 * torch.ones([1])) # Clipping the non-shared quantizers at the same position in the path as the two shared ones # in the same manner is required to simulate the same grad input for both the shared quantizers # and the unshared ones with torch.no_grad(): non_shared_mul_quantizer.scale.fill_(new_shared_scale) non_shared_add_quantizer.scale.fill_(new_shared_scale) final_output = compressed_model(test_input1, test_input2)[2] final_output.backward() assert torch.allclose( shared_quantizer.scale.grad, non_shared_mul_quantizer.scale.grad + non_shared_add_quantizer.scale.grad)