Example #1
0
def make_op_exec_context_for_coalescing_test(scope_str: str) -> OperationExecutionContext:
    ia_op_exec_context = InputAgnosticOperationExecutionContext.from_str(scope_str)
    op_exec_context = OperationExecutionContext(ia_op_exec_context.operator_name,
                                                ia_op_exec_context.scope_in_model,
                                                ia_op_exec_context.call_order,
                                                [TensorMeta(0, 0, [1])])
    return op_exec_context
Example #2
0
def generate_qp(scope_str: str,
                target: QuantizerGroup,
                in_port_id: int = None) -> SingleConfigQuantizationPoint:
    if target is QuantizerGroup.WEIGHTS:
        ip = InsertionPoint(InsertionType.NNCF_MODULE_PRE_OP,
                            module_scope=Scope.from_str(scope_str))
    elif target is QuantizerGroup.ACTIVATIONS:
        ip = InsertionPoint(
            InsertionType.OPERATOR_POST_HOOK
            if in_port_id is None else InsertionType.OPERATOR_PRE_HOOK,
            ia_op_exec_context=InputAgnosticOperationExecutionContext.from_str(
                scope_str),
            input_port_id=in_port_id)
    else:
        raise RuntimeError()
    return SingleConfigQuantizationPoint(ip, QuantizerConfig())
Example #3
0
def test_quantizer_scale_linking():
    nncf_config = get_quantization_config_without_range_init(model_size=1)
    nncf_config['quantizer_setup_type'] = 'pattern_based'
    nncf_config["compression"]["quantize_outputs"] = True
    nncf_config["compression"]["quantize_inputs"] = False
    nncf_config["input_info"] = [{
        "sample_size": [1, 1, 1, 1],
    }, {
        "sample_size": [1, 1, 1, 1],
    }]
    nncf_config["compression"]["activations"] = {
        "linked_quantizer_scopes": [[
            # Note: Assuming that quantizers are attached as a post-op to the specified operation
            "QuantizerLinkingTestModel/Path[path2]/__mul___0",
            "QuantizerLinkingTestModel/Path[path2]/__add___0",
        ]],
        "ignored_scopes": [
            # Ignore path output averaging operations
            "QuantizerLinkingTestModel/__add___0",
            "QuantizerLinkingTestModel/__add___1",
            "QuantizerLinkingTestModel/__add___2",
        ]
    }

    compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        QuantizerLinkingTestModel(), nncf_config)

    # 2 paths x 3 quantizers - 1 because two are shared in one path
    assert len(compression_ctrl.non_weight_quantizers) == 5

    test_input1 = torch.ones([1, 1, 1, 1])
    test_input2 = 2 * test_input1

    non_shared_mul_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path1]/__mul___0"))

    non_shared_add_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path1]/__add___0"))

    shared_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path2]/__add___0"))

    non_shared_mul_quantizer = compression_ctrl.non_weight_quantizers[
        non_shared_mul_quantizer_id].quantizer_module_ref
    non_shared_add_quantizer = compression_ctrl.non_weight_quantizers[
        non_shared_add_quantizer_id].quantizer_module_ref
    shared_quantizer = compression_ctrl.non_weight_quantizers[
        shared_quantizer_id].quantizer_module_ref

    old_scale = 765.0  # so that the quantum is equal to 3
    with torch.no_grad():
        for quantizer in compression_ctrl.all_quantizations.values():
            quantizer.scale.fill_(old_scale)

    # Expected outputs without compression - 6, 12, 8. Scale deliberately set to preserve the values
    uncompressed_expected_outputs = (6.0 * torch.ones([1]),
                                     12.0 * torch.ones([1]),
                                     18.0 * torch.ones([1]))
    outputs_with_shared_scale_1 = compressed_model(test_input1, test_input2)

    for uncomp_out, comp_out_1 in zip(uncompressed_expected_outputs,
                                      outputs_with_shared_scale_1):
        assert torch.allclose(uncomp_out, comp_out_1)

    # Specifically clip the shared quantizer's outputs by setting scale to 1.0
    new_shared_scale = 1.0
    with torch.no_grad():
        shared_quantizer.scale.fill_(new_shared_scale)
    outputs_with_shared_scale_2 = compressed_model(test_input1, test_input2)

    # __add___0 outputs
    assert torch.allclose(outputs_with_shared_scale_2[0],
                          4.0 * torch.ones([1]))
    # __mul___0 outputs
    assert torch.allclose(outputs_with_shared_scale_2[1],
                          7.0 * torch.ones([1]))
    # __add___1 outputs
    assert torch.allclose(outputs_with_shared_scale_2[2],
                          12.0 * torch.ones([1]))

    # Clipping the non-shared quantizers at the same position in the path as the two shared ones
    # in the same manner is required to simulate the same grad input for both the shared quantizers
    # and the unshared ones
    with torch.no_grad():
        non_shared_mul_quantizer.scale.fill_(new_shared_scale)
        non_shared_add_quantizer.scale.fill_(new_shared_scale)
    final_output = compressed_model(test_input1, test_input2)[2]
    final_output.backward()

    assert torch.allclose(
        shared_quantizer.scale.grad, non_shared_mul_quantizer.scale.grad +
        non_shared_add_quantizer.scale.grad)