def apply_insert_after(model):
    converter = TFModelConverterFactory.create(model)
    transformations = TFTransformationLayout()
    qconfig = QuantizerConfig(num_bits=8,
                              mode=QuantizationMode.SYMMETRIC,
                              signedness_to_force=None,
                              per_channel=False)

    functional_model = is_functional_model(model)
    for i, layer in enumerate(model.layers):
        original_node_name = layer.name

        if functional_model:
            _, layer_info = converter.get_layer_info_for_node(
                original_node_name)
            instance_idx = layer_info.instance_idx
        else:
            instance_idx = 0

        fake_quantize_name = f'FakeQuantize_{i}/{original_node_name}'
        fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config(
            qconfig, narrow_range=False, half_range=False),
                                           name=fake_quantize_name)

        transformations.register(
            TFInsertionCommand(
                target_point=commands.TFAfterLayer(original_node_name,
                                                   instance_idx=instance_idx,
                                                   output_port_id=0),
                callable_object=fake_quantize_layer,
                priority=TransformationPriority.QUANTIZATION_PRIORITY))

    transformer = TFModelTransformer(model)
    transformed_model = transformer.transform(transformations)
    return transformed_model
    def test_asymmetric_quantized_weights_equal_after_fix_applied(
            self, low, range_len, per_ch, init_w_as_middle_points,
            narrow_range):
        qconfig = QuantizerConfig(num_bits=8,
                                  mode=QuantizationMode.ASYMMETRIC,
                                  per_channel=per_ch)
        qspec = TFQuantizerSpec.from_config(qconfig,
                                            narrow_range=narrow_range,
                                            half_range=True)
        op_name = 'quantizer'
        weight_attr = 'kernel'

        layer = tf.keras.layers.Dense(DIM_SPLIT)
        layer = NNCFWrapper(layer)
        quantizer_cls = NNCF_QUANTIZATION_OPERATIONS.get(qspec.mode)
        quantizer = quantizer_cls(op_name, qspec)
        layer.registry_weight_operation(weight_attr, quantizer)
        layer.build(1)

        # Set layer weights
        new_w = get_weights_for_overflow_issue_test(low, range_len,
                                                    narrow_range,
                                                    init_w_as_middle_points)
        layer.get_layer_weight(weight_attr).assign(new_w)

        # Set quantizer weights
        if per_ch:
            low = tf.repeat(tf.constant([low], dtype=tf.float32),
                            repeats=[DIM_SPLIT])
            range_len = tf.repeat(tf.constant([range_len], dtype=tf.float32),
                                  repeats=[DIM_SPLIT])

        ops_weights = layer.ops_weights[op_name]
        ops_weights['input_low_var'].assign(low)
        ops_weights['input_range_var'].assign(range_len)

        w_int7 = layer(tf.ones((1, 1))).numpy()
        if init_w_as_middle_points:
            quant_len = range_len / (128 - (2 if narrow_range else 1))
            assert (np.abs(np.abs(w_int7 - new_w) - quant_len / 2) <
                    EPS).all(), 'Middle points calculated incorrectly'

        apply_overflow_fix_to_layer(layer, 'kernel', quantizer)
        assert not quantizer._half_range  # pylint: disable=protected-access
        w_int8 = layer(tf.ones((1, 1))).numpy()

        check_quantized_values_equals(w_int7, w_int8, EPS, range_len,
                                      narrow_range)
    def test_symmetric_quantized_weights_equal_after_fix_applied(
            self, per_ch, signedness_to_force, init_w_as_middle_points,
            narrow_range):
        qconfig = QuantizerConfig(num_bits=8,
                                  mode=QuantizationMode.SYMMETRIC,
                                  signedness_to_force=signedness_to_force,
                                  per_channel=per_ch)
        qspec = TFQuantizerSpec.from_config(qconfig,
                                            narrow_range=narrow_range,
                                            half_range=True)
        op_name = 'quantizer'
        weight_attr = 'kernel'

        layer = tf.keras.layers.Dense(DIM_SPLIT)
        layer = NNCFWrapper(layer)
        quantizer_cls = NNCF_QUANTIZATION_OPERATIONS.get(qspec.mode)
        quantizer = quantizer_cls(op_name, qspec)
        layer.registry_weight_operation(weight_attr, quantizer)
        layer.build(1)

        # Set layer weights
        ref_signed_var = -1 if signedness_to_force else 0
        ref_scale = 1
        low = ref_scale * ref_signed_var
        range_len = (1 - ref_signed_var) * ref_scale
        new_w = get_weights_for_overflow_issue_test(low, range_len,
                                                    narrow_range,
                                                    init_w_as_middle_points)
        layer.get_layer_weight(weight_attr).assign(new_w)

        # Check quantizer weights
        ops_weights = layer.ops_weights[op_name]
        assert (ops_weights['scale_var'].numpy() == ref_scale).all()
        assert (ops_weights['signed_var'].numpy() == ref_signed_var).all()

        w_int7 = layer(tf.ones((1, 1))).numpy()
        if init_w_as_middle_points:
            quant_len = range_len / (128 - (2 if narrow_range else 1))
            assert (np.abs(np.abs(w_int7 - new_w) - quant_len / 2) <
                    1e-6).all(), 'Middle points calculated incorrectly'

        apply_overflow_fix_to_layer(layer, 'kernel', quantizer)
        assert not quantizer._half_range  # pylint: disable=protected-access
        w_int8 = layer(tf.ones((1, 1))).numpy()

        check_quantized_values_equals(w_int7, w_int8, EPS, range_len,
                                      narrow_range)
def apply_insert_before(model):
    converter = TFModelConverterFactory.create(model)

    transformations = TFTransformationLayout()
    qconfig = QuantizerConfig(num_bits=8,
                              mode=QuantizationMode.SYMMETRIC,
                              signedness_to_force=None,
                              per_channel=False)

    functional_model = is_functional_model(model)
    for i, layer in enumerate(model.layers):
        # Insertion before input layer is not supported
        if isinstance(layer, layers.InputLayer):
            continue

        original_node_name = layer.name
        if functional_model:
            _, layer_info = converter.get_layer_info_for_node(
                original_node_name)
            instance_idx = layer_info.instance_idx
        else:
            instance_idx = 0

        inputs = [layer.input] if isinstance(layer.input,
                                             tf.Tensor) else layer.input

        for port, _ in enumerate(inputs):
            fake_quantize_name = f'FakeQuantize_{i}.{port}/{original_node_name}'
            fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config(
                qconfig, narrow_range=False, half_range=False),
                                               name=fake_quantize_name)

            transformations.register(
                TFInsertionCommand(
                    target_point=commands.TFBeforeLayer(
                        original_node_name,
                        instance_idx=instance_idx,
                        input_port_id=port),
                    callable_object=fake_quantize_layer,
                    priority=TransformationPriority.QUANTIZATION_PRIORITY))

    transformer = TFModelTransformer(model)
    transformed_model = transformer.transform(transformations)
    return transformed_model
Example #5
0
    def _get_quantizer_setup(self,
                             model: tf.keras.Model) -> TFQuantizationSetup:
        converter = TFModelConverterFactory.create(model)
        nncf_graph = converter.convert()
        nodes = nncf_graph.get_all_nodes()
        for node in nodes:
            if node.metatype in NOT_SUPPORT_LAYER_METATYPES:
                logger.warning(
                    'The layer {} is not supported by the quantization algorithm'
                    .format(
                        get_original_name_and_instance_idx(node.node_name)[0]))

        quantizable_weighted_layer_nodes = self._get_quantizable_weighted_layer_nodes(
            nncf_graph)
        custom_layer_nodes = self._get_custom_layer_node_names(
            nncf_graph, converter)

        quantizer_setup = self._get_quantizer_propagation_solution(
            nncf_graph, quantizable_weighted_layer_nodes, custom_layer_nodes,
            model)
        setup = TFQuantizationSetup()

        quantized_layer_names_vs_qconfigs = {
        }  # type: Dict[str, QuantizerConfig]
        qp_id_to_index = {}  # type: Dict[QuantizationPointId, int]
        tf_setup_qp_index = 0
        applied_overflow_fix = False
        first_conv_nodes = get_first_nodes_of_type(nncf_graph, ['Conv2D'])
        for qp_id, qp in quantizer_setup.quantization_points.items():
            if qp.is_weight_quantization_point():
                target_node = nncf_graph.get_node_by_name(
                    qp.insertion_point.target_node_name)
                is_custom, layer_info = converter.get_layer_info_for_node(
                    target_node.node_name)
                if is_custom:
                    raise RuntimeError(
                        "Quantizing custom layer weights is currently unsupported!"
                    )
                layer_name = layer_info.layer_name
                qconfig = qp.qconfig
                if layer_name in quantized_layer_names_vs_qconfigs:
                    assigned_qconfig = quantized_layer_names_vs_qconfigs[
                        layer_name]
                    if qconfig != assigned_qconfig:
                        raise RuntimeError(
                            f"Inconsistent quantizer configurations selected by solver for one and the "
                            f"same quantizable layer! Tried to assign {qconfig} to {layer_name} as "
                            f"specified by QP {qp_id}, but the layer already has quantizer "
                            f"config {assigned_qconfig} assigned to it!")
                    continue  # The layer has already been quantized
                quantized_layer_names_vs_qconfigs[layer_name] = qconfig
                metatype = target_node.metatype
                assert issubclass(metatype, TFLayerWithWeightsMetatype)
                for weight_def in metatype.weight_definitions:
                    op_name = self._get_quantizer_operation_name(
                        target_node.node_name, weight_def.weight_attr_name)
                    self._op_names.append(op_name)

                    half_range = self._get_half_range(qconfig, target_node,
                                                      first_conv_nodes)
                    applied_overflow_fix = applied_overflow_fix or half_range
                    quantizer_spec = TFQuantizerSpec.from_config(
                        qconfig,
                        narrow_range=not half_range,
                        half_range=half_range)
                    target_point = TFLayerWeight(layer_info.layer_name,
                                                 weight_def.weight_attr_name)
                    qpoint = TFQuantizationPoint(op_name, quantizer_spec,
                                                 target_point)
            else:
                assert qp.is_activation_quantization_point()
                ip = qp.insertion_point
                assert isinstance(ip, ActivationQuantizationInsertionPoint)
                target_node_name = ip.target_node_name
                input_port_id = ip.input_port_id
                fake_quantize_name = self._get_fake_quantize_name(
                    target_node_name, input_port_id)
                quantizer_spec = TFQuantizerSpec.from_config(
                    qp.qconfig, narrow_range=False, half_range=False)
                fake_quantize_layer = FakeQuantize(quantizer_spec,
                                                   name=fake_quantize_name)
                self._op_names.append(fake_quantize_layer.op_name)

                is_custom, layer_info = converter.get_layer_info_for_node(
                    target_node_name)
                if is_custom:
                    raise RuntimeError(
                        "Quantizing custom layer activations is currently unsupported!"
                    )
                if input_port_id is not None:
                    target_point = TFBeforeLayer(
                        layer_info.layer_name,
                        instance_idx=layer_info.instance_idx,
                        input_port_id=input_port_id)
                else:
                    target_point = TFAfterLayer(
                        layer_info.layer_name,
                        instance_idx=layer_info.instance_idx,
                        output_port_id=0)
                qpoint = TFQuantizationPoint(fake_quantize_name,
                                             quantizer_spec, target_point)

            setup.add_quantization_point(qpoint)
            qp_id_to_index[qp_id] = tf_setup_qp_index
            tf_setup_qp_index += 1

        setup = self._generate_unified_scale_groups(model, quantizer_setup,
                                                    qp_id_to_index, setup)

        self._raise_overflow_fix_warning(applied_overflow_fix)

        return setup
Example #6
0
class TestPerLayerRangeInitTest:
    PerLayerRangeInitTestStruct = namedtuple(
        'PerLayerRangeInitTestStruct',
        ('range_init_config', 'layer_vs_expected_init_config'))

    qconfig = QuantizerConfig(num_bits=8,
                              mode=QuantizationMode.SYMMETRIC,
                              signedness_to_force=None,
                              per_channel=False)
    qspec = TFQuantizerSpec.from_config(qconfig,
                                        narrow_range=False,
                                        half_range=False)

    PER_LAYER_RANGE_INIT_TEST_CASES = [
        PerLayerRangeInitTestStruct(
            range_init_config=[{
                "type": "min_max",
                "num_init_samples": 1,
                "target_scopes": ["{re}.*"]
            }],
            layer_vs_expected_init_config=[
                ((NNCFWrapper(
                    tf.keras.layers.Conv2D(2,
                                           3,
                                           activation="relu",
                                           name="conv1")), InputType.WEIGHTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1)),
                ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1))
            ]),
        PerLayerRangeInitTestStruct(
            range_init_config=[{
                "type": "min_max",
                "num_init_samples": 1,
                "target_scopes": ["{re}conv.*"]
            }, {
                "type": "mean_min_max",
                "num_init_samples": 2,
                "ignored_scopes": ["{re}conv.*"]
            }],
            layer_vs_expected_init_config=[
                ((NNCFWrapper(
                    tf.keras.layers.Conv2D(2,
                                           3,
                                           activation="relu",
                                           name="conv1")), InputType.WEIGHTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1)),
                ((NNCFWrapper(
                    tf.keras.layers.Conv2D(2,
                                           3,
                                           activation="relu",
                                           name="conv2")), InputType.WEIGHTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1)),
                ((tf.keras.layers.Layer(name='conv2_0'), InputType.INPUTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1)),
                ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS),
                 RangeInitConfig(init_type="mean_min_max",
                                 num_init_samples=2)),
            ]),
        PerLayerRangeInitTestStruct(
            range_init_config=[{
                "type":
                "min_max",
                "num_init_samples":
                1,
                "target_quantizer_group":
                "weights",
                "target_scopes":
                ["{re}TwoConvTestModel/Sequential\\[features\\]/.*"]
            }, {
                "type":
                "mean_min_max",
                "num_init_samples":
                2,
                "ignored_scopes": [
                    "{re}TwoConvTestModel/Sequential\\[features\\]/.*",
                    "{re}/nncf_model_input_0"
                ]
            }, {
                "type": "threesigma",
                "num_init_samples": 1,
                "target_quantizer_group": "activations",
                "target_scopes": ["{re}/nncf_model_input_0"]
            }, {
                "type":
                "percentile",
                "num_init_samples":
                10,
                "params": {
                    "min_percentile": "0.1",
                    "max_percentile": "99.9"
                },
                "target_quantizer_group":
                "activations",
                "target_scopes": [
                    "TwoConvTestModel/Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0"
                ]
            }],
            layer_vs_expected_init_config=[
                ((tf.keras.layers.Layer(name='/nncf_model_input_0'),
                  InputType.INPUTS),
                 RangeInitConfig(init_type="threesigma",
                                 num_init_samples=1)),
                ((tf.keras.layers.Layer(
                    name="TwoConvTestModel/"
                    "Sequential[features]/Sequential[0]/NNCFConv2d[0]/conv2d_0"
                ), InputType.WEIGHTS),
                 RangeInitConfig(init_type="min_max", num_init_samples=1)),
                ((tf.keras.layers.Layer(
                    name="TwoConvTestModel/"
                    "Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0"
                ), InputType.INPUTS),
                 RangeInitConfig(init_type="percentile",
                                 num_init_samples=10,
                                 init_type_specific_params={
                                     "min_percentile": "0.1",
                                     "max_percentile": "99.9"
                                 })),
            ])
    ]

    @staticmethod
    @pytest.fixture(params=PER_LAYER_RANGE_INIT_TEST_CASES)
    def per_layer_range_init_test_struct(request):
        return request.param

    def test_get_init_config_for_quantization_point(
            self, wrap_dataloader, per_layer_range_init_test_struct):
        per_layer_configs = []
        for sub_init_range_config_dict in per_layer_range_init_test_struct.range_init_config:
            per_layer_configs.append(
                PerLayerRangeInitConfig.from_dict(sub_init_range_config_dict))

        params = TFRangeInitParams(
            wrap_dataloader,
            '',
            global_init_config=None,
            per_layer_range_init_configs=per_layer_configs)

        for ((layer, input_type), ref_range_init_config) in \
                per_layer_range_init_test_struct.layer_vs_expected_init_config:
            assert params.get_init_config_for_quantization_point(
                layer, input_type) == ref_range_init_config