def apply_insert_after(model): converter = TFModelConverterFactory.create(model) transformations = TFTransformationLayout() qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) functional_model = is_functional_model(model) for i, layer in enumerate(model.layers): original_node_name = layer.name if functional_model: _, layer_info = converter.get_layer_info_for_node( original_node_name) instance_idx = layer_info.instance_idx else: instance_idx = 0 fake_quantize_name = f'FakeQuantize_{i}/{original_node_name}' fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config( qconfig, narrow_range=False, half_range=False), name=fake_quantize_name) transformations.register( TFInsertionCommand( target_point=commands.TFAfterLayer(original_node_name, instance_idx=instance_idx, output_port_id=0), callable_object=fake_quantize_layer, priority=TransformationPriority.QUANTIZATION_PRIORITY)) transformer = TFModelTransformer(model) transformed_model = transformer.transform(transformations) return transformed_model
def test_asymmetric_quantized_weights_equal_after_fix_applied( self, low, range_len, per_ch, init_w_as_middle_points, narrow_range): qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=per_ch) qspec = TFQuantizerSpec.from_config(qconfig, narrow_range=narrow_range, half_range=True) op_name = 'quantizer' weight_attr = 'kernel' layer = tf.keras.layers.Dense(DIM_SPLIT) layer = NNCFWrapper(layer) quantizer_cls = NNCF_QUANTIZATION_OPERATIONS.get(qspec.mode) quantizer = quantizer_cls(op_name, qspec) layer.registry_weight_operation(weight_attr, quantizer) layer.build(1) # Set layer weights new_w = get_weights_for_overflow_issue_test(low, range_len, narrow_range, init_w_as_middle_points) layer.get_layer_weight(weight_attr).assign(new_w) # Set quantizer weights if per_ch: low = tf.repeat(tf.constant([low], dtype=tf.float32), repeats=[DIM_SPLIT]) range_len = tf.repeat(tf.constant([range_len], dtype=tf.float32), repeats=[DIM_SPLIT]) ops_weights = layer.ops_weights[op_name] ops_weights['input_low_var'].assign(low) ops_weights['input_range_var'].assign(range_len) w_int7 = layer(tf.ones((1, 1))).numpy() if init_w_as_middle_points: quant_len = range_len / (128 - (2 if narrow_range else 1)) assert (np.abs(np.abs(w_int7 - new_w) - quant_len / 2) < EPS).all(), 'Middle points calculated incorrectly' apply_overflow_fix_to_layer(layer, 'kernel', quantizer) assert not quantizer._half_range # pylint: disable=protected-access w_int8 = layer(tf.ones((1, 1))).numpy() check_quantized_values_equals(w_int7, w_int8, EPS, range_len, narrow_range)
def test_symmetric_quantized_weights_equal_after_fix_applied( self, per_ch, signedness_to_force, init_w_as_middle_points, narrow_range): qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=signedness_to_force, per_channel=per_ch) qspec = TFQuantizerSpec.from_config(qconfig, narrow_range=narrow_range, half_range=True) op_name = 'quantizer' weight_attr = 'kernel' layer = tf.keras.layers.Dense(DIM_SPLIT) layer = NNCFWrapper(layer) quantizer_cls = NNCF_QUANTIZATION_OPERATIONS.get(qspec.mode) quantizer = quantizer_cls(op_name, qspec) layer.registry_weight_operation(weight_attr, quantizer) layer.build(1) # Set layer weights ref_signed_var = -1 if signedness_to_force else 0 ref_scale = 1 low = ref_scale * ref_signed_var range_len = (1 - ref_signed_var) * ref_scale new_w = get_weights_for_overflow_issue_test(low, range_len, narrow_range, init_w_as_middle_points) layer.get_layer_weight(weight_attr).assign(new_w) # Check quantizer weights ops_weights = layer.ops_weights[op_name] assert (ops_weights['scale_var'].numpy() == ref_scale).all() assert (ops_weights['signed_var'].numpy() == ref_signed_var).all() w_int7 = layer(tf.ones((1, 1))).numpy() if init_w_as_middle_points: quant_len = range_len / (128 - (2 if narrow_range else 1)) assert (np.abs(np.abs(w_int7 - new_w) - quant_len / 2) < 1e-6).all(), 'Middle points calculated incorrectly' apply_overflow_fix_to_layer(layer, 'kernel', quantizer) assert not quantizer._half_range # pylint: disable=protected-access w_int8 = layer(tf.ones((1, 1))).numpy() check_quantized_values_equals(w_int7, w_int8, EPS, range_len, narrow_range)
def apply_insert_before(model): converter = TFModelConverterFactory.create(model) transformations = TFTransformationLayout() qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) functional_model = is_functional_model(model) for i, layer in enumerate(model.layers): # Insertion before input layer is not supported if isinstance(layer, layers.InputLayer): continue original_node_name = layer.name if functional_model: _, layer_info = converter.get_layer_info_for_node( original_node_name) instance_idx = layer_info.instance_idx else: instance_idx = 0 inputs = [layer.input] if isinstance(layer.input, tf.Tensor) else layer.input for port, _ in enumerate(inputs): fake_quantize_name = f'FakeQuantize_{i}.{port}/{original_node_name}' fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config( qconfig, narrow_range=False, half_range=False), name=fake_quantize_name) transformations.register( TFInsertionCommand( target_point=commands.TFBeforeLayer( original_node_name, instance_idx=instance_idx, input_port_id=port), callable_object=fake_quantize_layer, priority=TransformationPriority.QUANTIZATION_PRIORITY)) transformer = TFModelTransformer(model) transformed_model = transformer.transform(transformations) return transformed_model
def _get_quantizer_setup(self, model: tf.keras.Model) -> TFQuantizationSetup: converter = TFModelConverterFactory.create(model) nncf_graph = converter.convert() nodes = nncf_graph.get_all_nodes() for node in nodes: if node.metatype in NOT_SUPPORT_LAYER_METATYPES: logger.warning( 'The layer {} is not supported by the quantization algorithm' .format( get_original_name_and_instance_idx(node.node_name)[0])) quantizable_weighted_layer_nodes = self._get_quantizable_weighted_layer_nodes( nncf_graph) custom_layer_nodes = self._get_custom_layer_node_names( nncf_graph, converter) quantizer_setup = self._get_quantizer_propagation_solution( nncf_graph, quantizable_weighted_layer_nodes, custom_layer_nodes, model) setup = TFQuantizationSetup() quantized_layer_names_vs_qconfigs = { } # type: Dict[str, QuantizerConfig] qp_id_to_index = {} # type: Dict[QuantizationPointId, int] tf_setup_qp_index = 0 applied_overflow_fix = False first_conv_nodes = get_first_nodes_of_type(nncf_graph, ['Conv2D']) for qp_id, qp in quantizer_setup.quantization_points.items(): if qp.is_weight_quantization_point(): target_node = nncf_graph.get_node_by_name( qp.insertion_point.target_node_name) is_custom, layer_info = converter.get_layer_info_for_node( target_node.node_name) if is_custom: raise RuntimeError( "Quantizing custom layer weights is currently unsupported!" ) layer_name = layer_info.layer_name qconfig = qp.qconfig if layer_name in quantized_layer_names_vs_qconfigs: assigned_qconfig = quantized_layer_names_vs_qconfigs[ layer_name] if qconfig != assigned_qconfig: raise RuntimeError( f"Inconsistent quantizer configurations selected by solver for one and the " f"same quantizable layer! Tried to assign {qconfig} to {layer_name} as " f"specified by QP {qp_id}, but the layer already has quantizer " f"config {assigned_qconfig} assigned to it!") continue # The layer has already been quantized quantized_layer_names_vs_qconfigs[layer_name] = qconfig metatype = target_node.metatype assert issubclass(metatype, TFLayerWithWeightsMetatype) for weight_def in metatype.weight_definitions: op_name = self._get_quantizer_operation_name( target_node.node_name, weight_def.weight_attr_name) self._op_names.append(op_name) half_range = self._get_half_range(qconfig, target_node, first_conv_nodes) applied_overflow_fix = applied_overflow_fix or half_range quantizer_spec = TFQuantizerSpec.from_config( qconfig, narrow_range=not half_range, half_range=half_range) target_point = TFLayerWeight(layer_info.layer_name, weight_def.weight_attr_name) qpoint = TFQuantizationPoint(op_name, quantizer_spec, target_point) else: assert qp.is_activation_quantization_point() ip = qp.insertion_point assert isinstance(ip, ActivationQuantizationInsertionPoint) target_node_name = ip.target_node_name input_port_id = ip.input_port_id fake_quantize_name = self._get_fake_quantize_name( target_node_name, input_port_id) quantizer_spec = TFQuantizerSpec.from_config( qp.qconfig, narrow_range=False, half_range=False) fake_quantize_layer = FakeQuantize(quantizer_spec, name=fake_quantize_name) self._op_names.append(fake_quantize_layer.op_name) is_custom, layer_info = converter.get_layer_info_for_node( target_node_name) if is_custom: raise RuntimeError( "Quantizing custom layer activations is currently unsupported!" ) if input_port_id is not None: target_point = TFBeforeLayer( layer_info.layer_name, instance_idx=layer_info.instance_idx, input_port_id=input_port_id) else: target_point = TFAfterLayer( layer_info.layer_name, instance_idx=layer_info.instance_idx, output_port_id=0) qpoint = TFQuantizationPoint(fake_quantize_name, quantizer_spec, target_point) setup.add_quantization_point(qpoint) qp_id_to_index[qp_id] = tf_setup_qp_index tf_setup_qp_index += 1 setup = self._generate_unified_scale_groups(model, quantizer_setup, qp_id_to_index, setup) self._raise_overflow_fix_warning(applied_overflow_fix) return setup
class TestPerLayerRangeInitTest: PerLayerRangeInitTestStruct = namedtuple( 'PerLayerRangeInitTestStruct', ('range_init_config', 'layer_vs_expected_init_config')) qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) qspec = TFQuantizerSpec.from_config(qconfig, narrow_range=False, half_range=False) PER_LAYER_RANGE_INIT_TEST_CASES = [ PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_scopes": ["{re}.*"] }], layer_vs_expected_init_config=[ ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv1")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS), RangeInitConfig(init_type="min_max", num_init_samples=1)) ]), PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_scopes": ["{re}conv.*"] }, { "type": "mean_min_max", "num_init_samples": 2, "ignored_scopes": ["{re}conv.*"] }], layer_vs_expected_init_config=[ ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv1")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv2")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((tf.keras.layers.Layer(name='conv2_0'), InputType.INPUTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS), RangeInitConfig(init_type="mean_min_max", num_init_samples=2)), ]), PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_quantizer_group": "weights", "target_scopes": ["{re}TwoConvTestModel/Sequential\\[features\\]/.*"] }, { "type": "mean_min_max", "num_init_samples": 2, "ignored_scopes": [ "{re}TwoConvTestModel/Sequential\\[features\\]/.*", "{re}/nncf_model_input_0" ] }, { "type": "threesigma", "num_init_samples": 1, "target_quantizer_group": "activations", "target_scopes": ["{re}/nncf_model_input_0"] }, { "type": "percentile", "num_init_samples": 10, "params": { "min_percentile": "0.1", "max_percentile": "99.9" }, "target_quantizer_group": "activations", "target_scopes": [ "TwoConvTestModel/Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0" ] }], layer_vs_expected_init_config=[ ((tf.keras.layers.Layer(name='/nncf_model_input_0'), InputType.INPUTS), RangeInitConfig(init_type="threesigma", num_init_samples=1)), ((tf.keras.layers.Layer( name="TwoConvTestModel/" "Sequential[features]/Sequential[0]/NNCFConv2d[0]/conv2d_0" ), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((tf.keras.layers.Layer( name="TwoConvTestModel/" "Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0" ), InputType.INPUTS), RangeInitConfig(init_type="percentile", num_init_samples=10, init_type_specific_params={ "min_percentile": "0.1", "max_percentile": "99.9" })), ]) ] @staticmethod @pytest.fixture(params=PER_LAYER_RANGE_INIT_TEST_CASES) def per_layer_range_init_test_struct(request): return request.param def test_get_init_config_for_quantization_point( self, wrap_dataloader, per_layer_range_init_test_struct): per_layer_configs = [] for sub_init_range_config_dict in per_layer_range_init_test_struct.range_init_config: per_layer_configs.append( PerLayerRangeInitConfig.from_dict(sub_init_range_config_dict)) params = TFRangeInitParams( wrap_dataloader, '', global_init_config=None, per_layer_range_init_configs=per_layer_configs) for ((layer, input_type), ref_range_init_config) in \ per_layer_range_init_test_struct.layer_vs_expected_init_config: assert params.get_init_config_for_quantization_point( layer, input_type) == ref_range_init_config