def get_quantizer_config(self) -> QuantizerConfig: return QuantizerConfig( num_bits=self.num_bits, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=None, per_channel=self.per_channel )
def apply_insert_after(model): converter = TFModelConverterFactory.create(model) transformations = TFTransformationLayout() qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) functional_model = is_functional_model(model) for i, layer in enumerate(model.layers): original_node_name = layer.name if functional_model: _, layer_info = converter.get_layer_info_for_node( original_node_name) instance_idx = layer_info.instance_idx else: instance_idx = 0 fake_quantize_name = f'FakeQuantize_{i}/{original_node_name}' fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config( qconfig, narrow_range=False, half_range=False), name=fake_quantize_name) transformations.register( TFInsertionCommand( target_point=commands.TFAfterLayer(original_node_name, instance_idx=instance_idx, output_port_id=0), callable_object=fake_quantize_layer, priority=TransformationPriority.QUANTIZATION_PRIORITY)) transformer = TFModelTransformer(model) transformed_model = transformer.transform(transformations) return transformed_model
def get_qconf_from_hw_config_subdict(quantization_subdict: Dict): bits = quantization_subdict['bits'] mode = HWConfig.get_quantization_mode_from_config_value( quantization_subdict['mode']) is_per_channel = HWConfig.get_is_per_channel_from_config_value( quantization_subdict['granularity']) signedness_to_force = None if 'level_low' in quantization_subdict and 'level_high' in quantization_subdict: signedness_to_force = False if mode == QuantizationMode.SYMMETRIC: if quantization_subdict[ 'level_low'] < 0 < quantization_subdict['level_high']: signedness_to_force = True true_level_low, true_level_high, _ = quant.calculate_symmetric_level_ranges( bits, signed=True) else: signedness_to_force = True true_level_low, true_level_high, _ = quant.calculate_asymmetric_level_ranges( bits) assert quantization_subdict['level_low'] == true_level_low, \ 'Invalid value of quantizer parameter `level_low`.\ The parameter must be consistent with other parameters!' assert quantization_subdict['level_high'] == true_level_high, \ 'Invalid value of quantizer parameter `level_high`.\ The parameter must be consistent with other parameters!' return QuantizerConfig(num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force)
def _get_default_qconfig(self, constraints: QuantizationConstraints = None): qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) if constraints is not None: qconfig = constraints.apply_constraints_to(qconfig) return qconfig
def validate_spy(self): super().validate_spy() qconfig_sequence = self.get_qsetup_spy.call_args[0][1] assert len(qconfig_sequence) == self.n_weight_quantizers all_precisions = {qc.num_bits for qc in qconfig_sequence} # with default compression ratio = 1.5 all precisions should be different from the default one assert all_precisions != {QuantizerConfig().num_bits} init_data_loader = self.hessian_trace_estimator_spy.call_args[0][5] expected_batch_size = self.batch_size_init if self.batch_size_init else self.batch_size assert init_data_loader.batch_size == expected_batch_size
def generate_qp(node_name: NNCFNodeName, target: QuantizerGroup, input_port_id: int = None) -> SingleConfigQuantizationPoint: if target is QuantizerGroup.WEIGHTS: qip = WeightQuantizationInsertionPoint(target_node_name=node_name) elif target is QuantizerGroup.ACTIVATIONS: qip = ActivationQuantizationInsertionPoint(target_node_name=node_name, input_port_id=input_port_id) else: raise RuntimeError() return SingleConfigQuantizationPoint(qip, QuantizerConfig(), [node_name])
def from_state(cls, state: Dict[str, Any]) -> 'SingleConfigQuantizationPoint': """ Creates the object from its state. :param state: Output of `get_state()` method. """ insertion_point_cls_name = state[cls._state_names.INSERTION_POINT_CLASS_NAME] insertion_point_cls = CommonStatefulClassesRegistry.get_registered_class(insertion_point_cls_name) insertion_point = insertion_point_cls.from_state(state[cls._state_names.INSERTION_POINT]) kwargs = { cls._state_names.INSERTION_POINT: insertion_point, cls._state_names.QCONFIG: QuantizerConfig.from_state(state[cls._state_names.QCONFIG]), cls._state_names.NAMES_OF_QUANTIZED_OPS: state[cls._state_names.NAMES_OF_QUANTIZED_OPS] } return cls(**kwargs)
def apply_insert_before(model): converter = TFModelConverterFactory.create(model) transformations = TFTransformationLayout() qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) functional_model = is_functional_model(model) for i, layer in enumerate(model.layers): # Insertion before input layer is not supported if isinstance(layer, layers.InputLayer): continue original_node_name = layer.name if functional_model: _, layer_info = converter.get_layer_info_for_node( original_node_name) instance_idx = layer_info.instance_idx else: instance_idx = 0 inputs = [layer.input] if isinstance(layer.input, tf.Tensor) else layer.input for port, _ in enumerate(inputs): fake_quantize_name = f'FakeQuantize_{i}.{port}/{original_node_name}' fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config( qconfig, narrow_range=False, half_range=False), name=fake_quantize_name) transformations.register( TFInsertionCommand( target_point=commands.TFBeforeLayer( original_node_name, instance_idx=instance_idx, input_port_id=port), callable_object=fake_quantize_layer, priority=TransformationPriority.QUANTIZATION_PRIORITY)) transformer = TFModelTransformer(model) transformed_model = transformer.transform(transformations) return transformed_model
def test_quantizer_setup_serialization(): target_type_1 = TargetType.OPERATOR_POST_HOOK check_serialization(target_type_1) target_type_2 = TargetType.POST_LAYER_OPERATION check_serialization(target_type_2) scope = Scope.from_str('MyConv/1[2]/3[4]/5') assert scope == Scope.from_str(str(scope)) pttp_1 = PTTargetPoint(target_type_1, target_node_name=str(scope), input_port_id=7) check_serialization(pttp_1) wqip = WeightQuantizationInsertionPoint(target_node_name=DUMMY_STR) check_serialization(wqip) aqip = ActivationQuantizationInsertionPoint(target_node_name=DUMMY_STR, input_port_id=0) check_serialization(aqip) qc = QuantizerConfig() check_serialization(qc) scqp_1 = SingleConfigQuantizationPoint( wqip, qc, directly_quantized_operator_node_names=[str(scope)]) check_serialization(scqp_1) scqp_2 = SingleConfigQuantizationPoint( aqip, qc, directly_quantized_operator_node_names=[str(scope)]) check_serialization(scqp_2) scqs = SingleConfigQuantizerSetup() scqs.quantization_points = {0: scqp_1, 1: scqp_2} scqs.unified_scale_groups = {2: {0, 1}} scqs.shared_input_operation_set_groups = {2: {0, 1}} check_serialization(scqs, comparator=single_config_quantizer_setup_cmp) assert scqs.get_state() == GROUND_TRUTH_STATE
def validate_spy(self): super().validate_spy() ctrl = self.builder_spy.spy_return final_bits = [qm.num_bits for qm in ctrl.all_quantizations.values()] assert set(final_bits) != {QuantizerConfig().num_bits} assert all(bit in self.BITS for bit in final_bits)
class QuantizationBuilder(TFCompressionAlgorithmBuilder): _state_names = QBuilderStateNames DEFAULT_QCONFIG = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) def __init__(self, config: NNCFConfig, should_init: bool = True): super().__init__(config, should_init) self.quantize_inputs = self._algo_config.get('quantize_inputs', True) self.quantize_outputs = self._algo_config.get('quantize_outputs', False) self._overflow_fix = self._algo_config.get('overflow_fix', 'enable') self._target_device = config.get('target_device', 'ANY') algo_config = self._get_algo_specific_config_section() if self._target_device == 'VPU' and 'preset' in algo_config: raise RuntimeError( "The VPU target device does not support presets.") self.global_quantizer_constraints = {} self.ignored_scopes_per_group = {} self.target_scopes_per_group = {} self._op_names = [] for quantizer_group in QuantizerGroup: self._parse_group_params(self._algo_config, quantizer_group) if self.should_init: self._parse_init_params() self._range_initializer = None self._bn_adaptation = None self._quantizer_setup = None self.hw_config = None if self._target_device != "TRIAL": hw_config_type = HWConfigType.from_str( HW_CONFIG_TYPE_TARGET_DEVICE_MAP[self._target_device]) hw_config_path = TFHWConfig.get_path_to_hw_config(hw_config_type) self.hw_config = TFHWConfig.from_json(hw_config_path) def _load_state_without_name(self, state_without_name: Dict[str, Any]): """ Initializes object from the state. :param state_without_name: Output of `get_state()` method. """ quantizer_setup_state = state_without_name[ self._state_names.QUANTIZER_SETUP] self._quantizer_setup = TFQuantizationSetup.from_state( quantizer_setup_state) def _get_state_without_name(self) -> Dict[str, Any]: """ Returns a dictionary with Python data structures (dict, list, tuple, str, int, float, True, False, None) that represents state of the object. :return: state of the object """ quantizer_setup_state = self._quantizer_setup.get_state() return {self._state_names.QUANTIZER_SETUP: quantizer_setup_state} def _parse_init_params(self): self._range_init_params = self._parse_range_init_params() def _parse_range_init_params(self) -> TFRangeInitParams: range_init_params = extract_range_init_params(self.config) return TFRangeInitParams( **range_init_params) if range_init_params is not None else None def _parse_group_params(self, quant_config: Dict, quantizer_group: QuantizerGroup) -> None: group_name = quantizer_group.value params_dict = {} params_dict_from_config = quant_config.get(group_name, {}) preset = quant_config.get('preset') if self._target_device in [ 'ANY', 'CPU', 'GPU' ] or self._target_device == 'TRIAL' and preset is not None: preset = QuantizationPreset.from_str( quant_config.get('preset', 'performance')) params_dict = preset.get_params_configured_by_preset( quantizer_group) overrided_params = params_dict.keys( ) & params_dict_from_config.keys() if overrided_params: logger.warning( 'Preset quantizer parameters {} explicitly overrided.'. format(overrided_params)) params_dict.update(params_dict_from_config) self.global_quantizer_constraints[ quantizer_group] = QuantizationConstraints.from_config_dict( params_dict) self.ignored_scopes_per_group[ quantizer_group] = params_dict_from_config.get( 'ignored_scopes', []) if self.ignored_scopes is not None: self.ignored_scopes_per_group[ quantizer_group] += self.ignored_scopes target_scopes = params_dict_from_config.get('target_scopes') if target_scopes is None and self.target_scopes is not None: self.target_scopes_per_group[quantizer_group] = self.target_scopes else: self.target_scopes_per_group[quantizer_group] = target_scopes def _get_default_qconfig( self, constraints: QuantizationConstraints = None) -> QuantizerConfig: qconfig = deepcopy(self.DEFAULT_QCONFIG) if constraints is not None: qconfig = constraints.apply_constraints_to(qconfig) return qconfig def _get_half_range(self, qconfig: QuantizerConfig, target_node: NNCFNode, first_conv_nodes: List[NNCFNode]) -> bool: if self._target_device in ['CPU', 'ANY'] and qconfig.num_bits == 8: if self._overflow_fix == 'enable': return True if self._overflow_fix == 'first_layer_only': if target_node in first_conv_nodes: return True return False def _create_quantizer(self, name: str, qspec: TFQuantizerSpec) -> Quantizer: quantizer_cls = NNCF_QUANTIZATION_OPERATIONS.get(qspec.mode) return quantizer_cls(name, qspec) def _build_insertion_commands_for_quantizer_setup(self, quantizer_setup: TFQuantizationSetup) \ -> List[TFInsertionCommand]: insertion_commands = [] quantization_points = quantizer_setup.get_quantization_points() non_unified_scales_quantization_point_ids = set( range(len(quantization_points))) for unified_scales_group in quantizer_setup.get_unified_scale_groups(): us_qp_id = unified_scales_group[0] qp = quantization_points[us_qp_id] quantizer_spec = qp.quantizer_spec op_name = qp.op_name + '/unified_scale_group' quantizer = FakeQuantize(quantizer_spec, name=op_name) self._op_names.append(quantizer.op_name) target_points = [] for us_qp_id in unified_scales_group: non_unified_scales_quantization_point_ids.discard(us_qp_id) qp = quantization_points[us_qp_id] assert quantizer_spec.get_state( ) == qp.quantizer_spec.get_state() target_points.append(qp.target_point) command = TFInsertionCommand( target_point=TFMultiLayerPoint(target_points), callable_object=quantizer, priority=TransformationPriority.QUANTIZATION_PRIORITY) insertion_commands.append(command) for qp_id in non_unified_scales_quantization_point_ids: quantization_point = quantization_points[qp_id] op_name = quantization_point.op_name quantizer_spec = quantization_point.quantizer_spec target_point = quantization_point.target_point if quantization_point.is_weight_quantization(): quantizer = self._create_quantizer(op_name, quantizer_spec) self._op_names.append(op_name) else: quantizer = FakeQuantize(quantizer_spec, name=op_name) self._op_names.append(quantizer.op_name) command = TFInsertionCommand( target_point=target_point, callable_object=quantizer, priority=TransformationPriority.QUANTIZATION_PRIORITY) insertion_commands.append(command) return insertion_commands def get_transformation_layout( self, model: tf.keras.Model) -> TFTransformationLayout: transformations = TFTransformationLayout() if self._quantizer_setup is None: self._quantizer_setup = self._get_quantizer_setup(model) insertion_commands = self._build_insertion_commands_for_quantizer_setup( self._quantizer_setup) for command in insertion_commands: transformations.register(command) return transformations def _get_custom_layer_node_names( self, nncf_graph: NNCFGraph, converter: TFModelConverter) -> List[NNCFNodeName]: retval = [] for node in nncf_graph.get_all_nodes(): metatype = node.metatype if metatype in OUTPUT_NOOP_METATYPES: continue is_custom, _ = converter.get_layer_info_for_node(node.node_name) if is_custom: retval.append(node.node_name) return retval def _build_controller(self, model: tf.keras.Model) -> 'QuantizationController': return QuantizationController(model, self.config, self._op_names) def initialize(self, model: tf.keras.Model) -> None: if self._range_init_params is not None: self._run_range_initialization(model) self._run_batchnorm_adaptation(model) def _run_range_initialization(self, model: tf.keras.Model) -> None: if self._range_initializer is None: self._range_initializer = RangeInitializer(self._range_init_params) self._range_initializer.run(model) def _run_batchnorm_adaptation(self, model: tf.keras.Model) -> None: if self._bn_adaptation is None: self._bn_adaptation = BatchnormAdaptationAlgorithm( **extract_bn_adaptation_init_params(self.config, self.name)) self._bn_adaptation.run(model) def _get_quantizer_setup(self, model: tf.keras.Model) -> TFQuantizationSetup: converter = TFModelConverterFactory.create(model) nncf_graph = converter.convert() nodes = nncf_graph.get_all_nodes() for node in nodes: if node.metatype in NOT_SUPPORT_LAYER_METATYPES: logger.warning( 'The layer {} is not supported by the quantization algorithm' .format( get_original_name_and_instance_idx(node.node_name)[0])) quantizable_weighted_layer_nodes = self._get_quantizable_weighted_layer_nodes( nncf_graph) custom_layer_nodes = self._get_custom_layer_node_names( nncf_graph, converter) quantizer_setup = self._get_quantizer_propagation_solution( nncf_graph, quantizable_weighted_layer_nodes, custom_layer_nodes, model) setup = TFQuantizationSetup() quantized_layer_names_vs_qconfigs = { } # type: Dict[str, QuantizerConfig] qp_id_to_index = {} # type: Dict[QuantizationPointId, int] tf_setup_qp_index = 0 applied_overflow_fix = False first_conv_nodes = get_first_nodes_of_type(nncf_graph, ['Conv2D']) for qp_id, qp in quantizer_setup.quantization_points.items(): if qp.is_weight_quantization_point(): target_node = nncf_graph.get_node_by_name( qp.insertion_point.target_node_name) is_custom, layer_info = converter.get_layer_info_for_node( target_node.node_name) if is_custom: raise RuntimeError( "Quantizing custom layer weights is currently unsupported!" ) layer_name = layer_info.layer_name qconfig = qp.qconfig if layer_name in quantized_layer_names_vs_qconfigs: assigned_qconfig = quantized_layer_names_vs_qconfigs[ layer_name] if qconfig != assigned_qconfig: raise RuntimeError( f"Inconsistent quantizer configurations selected by solver for one and the " f"same quantizable layer! Tried to assign {qconfig} to {layer_name} as " f"specified by QP {qp_id}, but the layer already has quantizer " f"config {assigned_qconfig} assigned to it!") continue # The layer has already been quantized quantized_layer_names_vs_qconfigs[layer_name] = qconfig metatype = target_node.metatype assert issubclass(metatype, TFLayerWithWeightsMetatype) for weight_def in metatype.weight_definitions: op_name = self._get_quantizer_operation_name( target_node.node_name, weight_def.weight_attr_name) self._op_names.append(op_name) half_range = self._get_half_range(qconfig, target_node, first_conv_nodes) applied_overflow_fix = applied_overflow_fix or half_range quantizer_spec = TFQuantizerSpec.from_config( qconfig, narrow_range=not half_range, half_range=half_range) target_point = TFLayerWeight(layer_info.layer_name, weight_def.weight_attr_name) qpoint = TFQuantizationPoint(op_name, quantizer_spec, target_point) else: assert qp.is_activation_quantization_point() ip = qp.insertion_point assert isinstance(ip, ActivationQuantizationInsertionPoint) target_node_name = ip.target_node_name input_port_id = ip.input_port_id fake_quantize_name = self._get_fake_quantize_name( target_node_name, input_port_id) quantizer_spec = TFQuantizerSpec.from_config( qp.qconfig, narrow_range=False, half_range=False) fake_quantize_layer = FakeQuantize(quantizer_spec, name=fake_quantize_name) self._op_names.append(fake_quantize_layer.op_name) is_custom, layer_info = converter.get_layer_info_for_node( target_node_name) if is_custom: raise RuntimeError( "Quantizing custom layer activations is currently unsupported!" ) if input_port_id is not None: target_point = TFBeforeLayer( layer_info.layer_name, instance_idx=layer_info.instance_idx, input_port_id=input_port_id) else: target_point = TFAfterLayer( layer_info.layer_name, instance_idx=layer_info.instance_idx, output_port_id=0) qpoint = TFQuantizationPoint(fake_quantize_name, quantizer_spec, target_point) setup.add_quantization_point(qpoint) qp_id_to_index[qp_id] = tf_setup_qp_index tf_setup_qp_index += 1 setup = self._generate_unified_scale_groups(model, quantizer_setup, qp_id_to_index, setup) self._raise_overflow_fix_warning(applied_overflow_fix) return setup def _raise_overflow_fix_warning(self, applied_overflow_fix: bool): if applied_overflow_fix: if self._overflow_fix == 'enable': quantizers_with_overflow_fix_str = 'all weight quantizers' elif self._overflow_fix == 'first_layer_only': quantizers_with_overflow_fix_str = 'first convolution weight quantizers' logger.warning( 'The overflow issue fix will be applied. ' 'Now {} will effectively use only 7 bits out of ' '8 bits. This resolves the overflow issue problem on AVX2 and AVX-512 machines. ' 'Please take a look at the documentation for a detailed information.' .format(quantizers_with_overflow_fix_str)) def _generate_unified_scale_groups( self, model: tf.keras.Model, quantizer_setup: SingleConfigQuantizerSetup, qp_id_to_index: Dict[QuantizationPointId, int], setup: TFQuantizationSetup) -> TFQuantizationSetup: # To properly set the instance indices for FQ need to save layers order like in the model config layer_names = [layer.name for layer in model.layers] for unified_group in quantizer_setup.unified_scale_groups.values(): sorted_unified_group = [] for qp_id in unified_group: qp = quantizer_setup.quantization_points[qp_id] qp_layer_name = qp.insertion_point.target_node_name original_name, _ = get_original_name_and_instance_idx( qp_layer_name) layer_idx = layer_names.index(original_name) tf_setup_index = qp_id_to_index[qp_id] sorted_unified_group.append((tf_setup_index, layer_idx)) sorted_unified_group = sorted(sorted_unified_group, key=lambda x: x[1]) setup.register_unified_scale_group( [setup_index for setup_index, _ in sorted_unified_group]) return setup def _get_quantizable_weighted_layer_nodes( self, nncf_graph: NNCFGraph) -> List[QuantizableWeightedLayerNode]: nodes_with_weights = [] for node in nncf_graph.get_all_nodes(): metatype = node.metatype if metatype in OUTPUT_NOOP_METATYPES: continue if not (metatype in QUANTIZATION_LAYER_METATYPES and should_consider_scope( node.node_name, ignored_scopes=self.ignored_scopes_per_group[ QuantizerGroup.WEIGHTS], target_scopes=None)): continue assert issubclass(metatype, TFLayerWithWeightsMetatype) nodes_with_weights.append(node) scope_overrides_dict = self._get_algo_specific_config_section().get( 'scope_overrides', {}) weighted_node_and_qconf_lists = assign_qconfig_lists_to_modules( nodes_with_weights, self.DEFAULT_QCONFIG, self.global_quantizer_constraints[QuantizerGroup.WEIGHTS], scope_overrides_dict, hw_config=self.hw_config) return [ QuantizableWeightedLayerNode(node, qconf_list) for node, qconf_list in weighted_node_and_qconf_lists.items() ] def _get_quantizer_propagation_solution(self, nncf_graph: NNCFGraph, quantizable_weighted_layer_nodes: List[QuantizableWeightedLayerNode], custom_layer_node_names: List[NNCFNodeName], model: tf.keras.Model) \ -> SingleConfigQuantizerSetup: ip_graph = InsertionPointGraph( nncf_graph, [qn.node.node_name for qn in quantizable_weighted_layer_nodes]) pattern = TF_HW_FUSED_PATTERNS.get_full_pattern_graph() ip_graph = ip_graph.get_ip_graph_with_merged_hw_optimized_operations( pattern) input_preprocessing_nodes = self._get_input_preprocessing_nodes( nncf_graph, model) input_preprocessing_node_names = [ n.node_name for n in input_preprocessing_nodes ] if custom_layer_node_names: logger.warning( 'Custom layers [{}] ' 'will be ignored during quantization since it is not yet supported in NNCF' .format(", ".join([str(l) for l in custom_layer_node_names]))) ignored_scopes_for_solver = self.ignored_scopes_per_group[QuantizerGroup.ACTIVATIONS] + \ input_preprocessing_node_names + custom_layer_node_names solver = QuantizerPropagationSolver( ignored_scopes=ignored_scopes_for_solver, target_scopes=self.target_scopes_per_group[ QuantizerGroup.ACTIVATIONS], hw_config=self.hw_config, default_trait_to_metatype_map=DEFAULT_TF_QUANT_TRAIT_TO_OP_DICT, default_qconfig_list=[ self._get_default_qconfig(self.global_quantizer_constraints[ QuantizerGroup.ACTIVATIONS]) ], quantizable_layer_nodes=quantizable_weighted_layer_nodes, global_constraints=self.global_quantizer_constraints, quantize_outputs=self.quantize_outputs) quantization_proposal = solver.run_on_ip_graph(ip_graph) multi_config_setup = quantization_proposal.quantizer_setup single_config_setup = multi_config_setup.select_first_qconfig_for_each_point( ) finalized_proposal = quantization_proposal.finalize( single_config_setup) final_setup = solver.get_final_quantizer_setup(finalized_proposal) final_setup = self._handle_quantize_inputs_option( final_setup, nncf_graph) return final_setup def _handle_quantize_inputs_option( self, quantizer_setup: SingleConfigQuantizerSetup, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: qp_ids_to_discard = [] for qp_id, qp in quantizer_setup.quantization_points.items(): if qp.is_activation_quantization_point(): insertion_point = qp.insertion_point target_node = nncf_graph.get_node_by_name( insertion_point.target_node_name) if not self.quantize_inputs and target_node.metatype in INPUT_NOOP_METATYPES: qp_ids_to_discard.append(qp_id) for qp_id in qp_ids_to_discard: quantizer_setup.discard(qp_id, keep_shared_input_qps=True) return quantizer_setup def _get_input_preprocessing_nodes( self, nncf_graph: NNCFGraph, model: tf.keras.Model) -> List[NNCFNode]: retval = [] def traverse_fn( node: NNCFNode, preprocessing_nodes: List[NNCFNode] ) -> Tuple[bool, List[NNCFNode]]: is_finished = True successors = nncf_graph.get_next_nodes(node) if len(successors) == 1: successor = next(iter(successors)) # It is necessary to determine the number of input nodes from the model # in order to correctly count the duplicated edges original_name, _ = get_original_name_and_instance_idx( successor.node_name) layer = model.get_layer(name=original_name) num_previous_nodes = len(layer.input) if isinstance( layer.input, list) else 1 if successor.metatype in ELEMENTWISE_LAYER_METATYPES and num_previous_nodes == 1: preprocessing_nodes.append(successor) is_finished = False return is_finished, preprocessing_nodes for nncf_node in nncf_graph.get_input_nodes(): preprocessing_nodes_for_this_input = nncf_graph.traverse_graph( nncf_node, traverse_fn) retval += preprocessing_nodes_for_this_input return retval def _get_quantized_nodes_for_output( self, nncf_graph: NNCFGraph, insertion_points: List[str], node_key: str, quantized_nodes_for_output: List[NNCFNode] = None ) -> List[NNCFNode]: nncf_node = nncf_graph.get_node_by_key(node_key) if quantized_nodes_for_output is None: if node_key in insertion_points: return [nncf_node] quantized_nodes_for_output = [] for predecessor in nncf_graph.get_previous_nodes(nncf_node): pred_node_key = nncf_graph.get_node_key_by_id(predecessor.node_id) if len(nncf_graph.get_next_nodes(predecessor)) > 1: logger.warning( 'Removing of FakeQuantize after layer {} ' 'with multiple outputs is not fully supported'.format( predecessor.node_name)) if predecessor.metatype in LAYER_METATYPES_AGNOSTIC_TO_DATA_PRECISION: self._get_quantized_nodes_for_output( nncf_graph, insertion_points, pred_node_key, quantized_nodes_for_output) elif nncf_graph.get_node_key_by_id( predecessor.node_id) in insertion_points: quantized_nodes_for_output.append(predecessor) return quantized_nodes_for_output def _get_fake_quantize_name(self, node_name: NNCFNodeName, input_port_id: int = None) -> str: original_node_name, instance_idx = get_original_name_and_instance_idx( node_name) fq_name = '{}/fake_quantize'.format(original_node_name) if instance_idx != 0: fq_name += f"_{instance_idx}" if input_port_id is not None: fq_name += f"_I{input_port_id}" return fq_name def _get_quantizer_operation_name(self, layer_name, weight_attr_name): return f'{layer_name}_{weight_attr_name}_quantizer'
class TestPerLayerRangeInitTest: PerLayerRangeInitTestStruct = namedtuple( 'PerLayerRangeInitTestStruct', ('range_init_config', 'layer_vs_expected_init_config')) qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) qspec = TFQuantizerSpec.from_config(qconfig, narrow_range=False, half_range=False) PER_LAYER_RANGE_INIT_TEST_CASES = [ PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_scopes": ["{re}.*"] }], layer_vs_expected_init_config=[ ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv1")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS), RangeInitConfig(init_type="min_max", num_init_samples=1)) ]), PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_scopes": ["{re}conv.*"] }, { "type": "mean_min_max", "num_init_samples": 2, "ignored_scopes": ["{re}conv.*"] }], layer_vs_expected_init_config=[ ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv1")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((NNCFWrapper( tf.keras.layers.Conv2D(2, 3, activation="relu", name="conv2")), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((tf.keras.layers.Layer(name='conv2_0'), InputType.INPUTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((FakeQuantize(qspec, name='fq1'), InputType.INPUTS), RangeInitConfig(init_type="mean_min_max", num_init_samples=2)), ]), PerLayerRangeInitTestStruct( range_init_config=[{ "type": "min_max", "num_init_samples": 1, "target_quantizer_group": "weights", "target_scopes": ["{re}TwoConvTestModel/Sequential\\[features\\]/.*"] }, { "type": "mean_min_max", "num_init_samples": 2, "ignored_scopes": [ "{re}TwoConvTestModel/Sequential\\[features\\]/.*", "{re}/nncf_model_input_0" ] }, { "type": "threesigma", "num_init_samples": 1, "target_quantizer_group": "activations", "target_scopes": ["{re}/nncf_model_input_0"] }, { "type": "percentile", "num_init_samples": 10, "params": { "min_percentile": "0.1", "max_percentile": "99.9" }, "target_quantizer_group": "activations", "target_scopes": [ "TwoConvTestModel/Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0" ] }], layer_vs_expected_init_config=[ ((tf.keras.layers.Layer(name='/nncf_model_input_0'), InputType.INPUTS), RangeInitConfig(init_type="threesigma", num_init_samples=1)), ((tf.keras.layers.Layer( name="TwoConvTestModel/" "Sequential[features]/Sequential[0]/NNCFConv2d[0]/conv2d_0" ), InputType.WEIGHTS), RangeInitConfig(init_type="min_max", num_init_samples=1)), ((tf.keras.layers.Layer( name="TwoConvTestModel/" "Sequential[features]/Sequential[1]/NNCFConv2d[0]/conv2d_0" ), InputType.INPUTS), RangeInitConfig(init_type="percentile", num_init_samples=10, init_type_specific_params={ "min_percentile": "0.1", "max_percentile": "99.9" })), ]) ] @staticmethod @pytest.fixture(params=PER_LAYER_RANGE_INIT_TEST_CASES) def per_layer_range_init_test_struct(request): return request.param def test_get_init_config_for_quantization_point( self, wrap_dataloader, per_layer_range_init_test_struct): per_layer_configs = [] for sub_init_range_config_dict in per_layer_range_init_test_struct.range_init_config: per_layer_configs.append( PerLayerRangeInitConfig.from_dict(sub_init_range_config_dict)) params = TFRangeInitParams( wrap_dataloader, '', global_init_config=None, per_layer_range_init_configs=per_layer_configs) for ((layer, input_type), ref_range_init_config) in \ per_layer_range_init_test_struct.layer_vs_expected_init_config: assert params.get_init_config_for_quantization_point( layer, input_type) == ref_range_init_config
def test_quantizer_ordering(requanting_qconf: QuantizerConfig, base_qconf: QuantizerConfig, is_valid_requant: bool): test_result = requanting_qconf.is_valid_requantization_for(base_qconf) assert test_result == is_valid_requant
matches = set() for aq_id, aq_info in qctrl.non_weight_quantizers.items(): for target_point in aq_info.affected_insertions: if qinput_scope_str in str(target_point.target_node_name): matches.add(aq_id) assert len(matches) == 1 input_aq_id = next(iter(matches)) quantizer = qctrl.non_weight_quantizers[ input_aq_id].quantizer_module_ref assert isinstance(quantizer, SymmetricQuantizer) @pytest.mark.parametrize( ('requanting_qconf', 'base_qconf', 'is_valid_requant'), ( (QuantizerConfig(), QuantizerConfig(), True), (QuantizerConfig(num_bits=8), QuantizerConfig(num_bits=6), False), (QuantizerConfig(num_bits=6), QuantizerConfig(num_bits=8), True), # Technically placing a per-channel quantization after a per-tensor should not break # anything or limit the set of output values w.r.t to a single per-tensor quantizer. (QuantizerConfig(num_bits=6, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), (QuantizerConfig(num_bits=6, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), (QuantizerConfig(num_bits=5, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), (QuantizerConfig(num_bits=5, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), (QuantizerConfig(num_bits=5, mode=QuantizationMode.SYMMETRIC), QuantizerConfig(num_bits=5, mode=QuantizationMode.ASYMMETRIC), True),