Exemple #1
0
    def _apply_minmax_init(self,
                           min_values,
                           max_values,
                           log_module_name: str = None):
        if torch.any(torch.eq(min_values, np.inf)) or torch.any(
                torch.eq(max_values, -np.inf)):
            raise AttributeError(
                'Statistics is not collected for {}'.format(log_module_name))
        sign = torch.any(torch.lt(min_values, 0))
        if self._signedness_to_force is not None and sign != self._signedness_to_force:
            nncf_logger.warning("Forcing signed to {} for module {}".format(
                self._signedness_to_force, log_module_name))
            sign = self._signedness_to_force
        self.signed = int(sign)

        abs_max = torch.max(torch.abs(max_values), torch.abs(min_values))
        SCALE_LOWER_THRESHOLD = 0.1
        mask = torch.gt(abs_max, SCALE_LOWER_THRESHOLD)
        self._scale_param_storage.data = torch.where(
            mask, abs_max,
            SCALE_LOWER_THRESHOLD * torch.ones_like(self._scale_param_storage))
        if self._is_using_log_scale_storage:
            self._scale_param_storage.data.log_()

        nncf_logger.info("Set sign: {} and scale: {} for {}".format(
            self.signed, get_flat_tensor_contents_string(self.scale),
            log_module_name))
Exemple #2
0
    def _get_quantized_nodes_for_output(
            self,
            nncf_graph: NNCFGraph,
            insertion_points: List[str],
            node_key: str,
            quantized_nodes_for_output: List[NNCFNode] = None
    ) -> List[NNCFNode]:
        nncf_node = nncf_graph.get_node_by_key(node_key)
        if quantized_nodes_for_output is None:
            if node_key in insertion_points:
                return [nncf_node]
            quantized_nodes_for_output = []

        for predecessor in nncf_graph.get_previous_nodes(nncf_node):
            pred_node_key = nncf_graph.get_node_key_by_id(predecessor.node_id)
            if len(nncf_graph.get_next_nodes(predecessor)) > 1:
                logger.warning(
                    'Removing of FakeQuantize after layer {} '
                    'with multiple outputs is not fully supported'.format(
                        predecessor.node_name))
            if predecessor.metatype in LAYER_METATYPES_AGNOSTIC_TO_DATA_PRECISION:
                self._get_quantized_nodes_for_output(
                    nncf_graph, insertion_points, pred_node_key,
                    quantized_nodes_for_output)
            elif nncf_graph.get_node_key_by_id(
                    predecessor.node_id) in insertion_points:
                quantized_nodes_for_output.append(predecessor)
        return quantized_nodes_for_output
    def handle_problematic(self, is_resume: bool, are_all_loaded_params_matched: bool):
        """
        Reports about errors during the matching state_dict_to_load parameters to the model's state_dict ones.
        It raises an error if is_resume is True or prints warning when it's False. The report happens if
        state_dict_to_load has parameters that could not be matched to model parameters or if the shape of parameters is
        not matching. If some parameters required by model are missing in state_dict_to_load reporting occurs for
        non optional parameters only or when not all parameters from state_dict_to_load match.
        :param is_resume: Determines the behavior when the function cannot do a successful parameter match when loading.
        :param are_all_loaded_params_matched: whether all parameters to load match with model parameters
        """
        error_msgs = []

        def add_error_msg(name, keys_):
            error_msgs.insert(
                0, '{} key(s):\n{}. '.format(name,
                                             ',\n'.join('\t\t"{}"'.format(k) for k in keys_)))

        for key_status, keys in self._keys.items():
            is_missing = key_status == ProcessedKeyStatus.MISSING
            erroneous = key_status in (ProcessedKeyStatus.SIZE_MISMATCHED, ProcessedKeyStatus.UNEXPECTED)
            if keys and (erroneous or is_missing and (is_resume or not are_all_loaded_params_matched)):
                add_error_msg(key_status.value, keys)
        if error_msgs:
            error_msg = 'Error(s) when loading model parameters:\n\t{}'.format("\n\t".join(error_msgs))
            if is_resume:
                raise RuntimeError(error_msg)
            nncf_logger.warning(error_msg)
Exemple #4
0
 def _parse_group_params(self, quant_config: Dict,
                         quantizer_group: QuantizerGroup) -> None:
     group_name = quantizer_group.value
     params_dict = {}
     params_dict_from_config = quant_config.get(group_name, {})
     preset = quant_config.get('preset')
     if self._target_device in [
             'ANY', 'CPU', 'GPU'
     ] or self._target_device == 'TRIAL' and preset is not None:
         preset = QuantizationPreset.from_str(
             quant_config.get('preset', 'performance'))
         params_dict = preset.get_params_configured_by_preset(
             quantizer_group)
         overrided_params = params_dict.keys(
         ) & params_dict_from_config.keys()
         if overrided_params:
             logger.warning(
                 'Preset quantizer parameters {} explicitly overrided.'.
                 format(overrided_params))
     params_dict.update(params_dict_from_config)
     self.global_quantizer_constraints[
         quantizer_group] = QuantizationConstraints.from_config_dict(
             params_dict)
     self.ignored_scopes_per_group[
         quantizer_group] = params_dict_from_config.get(
             'ignored_scopes', [])
     if self.ignored_scopes is not None:
         self.ignored_scopes_per_group[
             quantizer_group] += self.ignored_scopes
     target_scopes = params_dict_from_config.get('target_scopes')
     if target_scopes is None and self.target_scopes is not None:
         self.target_scopes_per_group[quantizer_group] = self.target_scopes
     else:
         self.target_scopes_per_group[quantizer_group] = target_scopes
Exemple #5
0
    def get_transformation_layout(self, model):
        nxmodel = convert_keras_model_to_nxmodel(model)
        for node_name, node in nxmodel.nodes.items():
            if node['type'] in NOT_SUPPORT_LAYERS:
                logger.warning(
                    'The layer {} is not supported by the quantization algorithm'
                    .format(
                        get_original_name_and_instance_index(node_name)[0]))

        transformations = TFTransformationLayout()
        qconfig = self._get_default_qconfig(
            self.global_quantizer_constraints[WEIGHTS])
        shared_nodes = set()
        for node_name, node in nxmodel.nodes.items():
            original_node_name, _ = get_original_name_and_instance_index(
                node_name)
            if node['type'] not in QUANTIZATION_LAYERS \
                    or is_ignored(node_name, self.ignored_scopes_per_group[WEIGHTS]) \
                    or original_node_name in shared_nodes:
                continue

            if node['is_shared']:
                shared_nodes.add(original_node_name)

            weight_attr_name = QUANTIZATION_LAYERS[
                node['type']][WEIGHT_ATTR_NAME]
            op_name = self._get_quantizer_operation_name(
                node_name, weight_attr_name)

            operation = self._create_quantizer(
                op_name,
                TFQuantizerSpec.from_config(qconfig,
                                            narrow_range=True,
                                            half_range=False))

            transformations.register(
                TFInsertionCommand(
                    target_point=TFLayerWeight(original_node_name,
                                               weight_attr_name),
                    callable_object=operation,
                    priority=TransformationPriority.QUANTIZATION_PRIORITY))

        insertion_points = self._find_insertion_points(nxmodel)
        qconfig = self._get_default_qconfig(
            self.global_quantizer_constraints[ACTIVATIONS])
        for original_node_name, instance_index in insertion_points:
            fake_quantize_name = self._get_fake_quantize_name(
                original_node_name, instance_index)
            fake_quantize_layer = FakeQuantize(TFQuantizerSpec.from_config(
                qconfig, narrow_range=False, half_range=False),
                                               name=fake_quantize_name)

            transformations.register(
                TFInsertionCommand(
                    target_point=TFAfterLayer(original_node_name,
                                              instance_index),
                    callable_object=fake_quantize_layer,
                    priority=TransformationPriority.QUANTIZATION_PRIORITY))

        return transformations
Exemple #6
0
    def _maybe_should_skip(self) -> None:
        """
        Checks if the first epoch (with index 0) should be skipped to calculate
        the steps per epoch. If the skip is needed, then the internal state
        of the scheduler object will not be changed.
        """
        self._should_skip = False
        if self._update_per_optimizer_step:
            if self._steps_per_epoch is None and self._steps_in_current_epoch > 0:
                self._steps_per_epoch = self._steps_in_current_epoch

            if self._steps_per_epoch is not None and self._steps_in_current_epoch > 0:
                if self._steps_per_epoch != self._steps_in_current_epoch:
                    raise Exception(
                        'Actual steps per epoch and steps per epoch from the scheduler '
                        'parameters are different. Scheduling may be incorrect.'
                    )

            if self._steps_per_epoch is None:
                self._should_skip = True
                logger.warning(
                    'Scheduler set to update sparsity level per optimizer step, '
                    'but steps_per_epoch was not set in config. Will only start updating '
                    'sparsity level after measuring the actual steps per epoch as signaled '
                    'by a .epoch_step() call.')
Exemple #7
0
def get_concat_axis(input_shapes: List[List[int]], output_shapes: List[List[int]]) -> int:
    """
    Returns concatenation axis by given input and output shape of concat node.

    :param input_shapes: Input_shapes of given concat node.
    :param output_shapes: Input_shapes of given concat node.
    :returns: Concatenation axis of given concat node.
    """
    axis = None
    none_dim = None
    for idx, (dim_in, dim_out) in enumerate(zip(input_shapes[0], output_shapes[0])):
        if dim_in != dim_out:
            axis = idx
            break
        if dim_in is None:
            none_dim = idx

    if axis is None:
        if none_dim is None:
            axis = -1
            logger.warning('Identity concat node detected')
        else:
            axis = none_dim

    return axis
Exemple #8
0
    def parse_from_quantizer_setup(
        self, all_quantizations: Dict[QuantizerId, BaseQuantizer],
        quantizer_setup: QuantizerSetupBase,
        quantization_point_id_vs_quantizer_id: Dict[QuantizationPointId,
                                                    QuantizerId]):
        for group_idx, group in quantizer_setup.shared_input_operation_set_groups.items(
        ):
            act_quant_tuples = [
            ]  # type: List[Tuple[QuantizerId, BaseQuantizer]]
            wt_quant_tuples = [
            ]  # type: List[Tuple[QuantizerId, BaseQuantizer]]

            quantized_node_per_activation_qp_id = {
            }  # type: Dict[NNCFNodeName, QuantizationPointId]
            module_scope_per_weight_qp_id = {
            }  # type: Dict[NNCFNodeName, QuantizationPointId]

            for qp_id in group:
                qp = quantizer_setup.quantization_points[qp_id]
                quant_id = quantization_point_id_vs_quantizer_id[qp_id]
                quantizer_module = all_quantizations[quant_id]
                resulting_tuple = (quant_id, quantizer_module)
                if qp.is_weight_quantization_point():
                    wt_quant_tuples.append(resulting_tuple)
                    weight_quantized_module_node_name = qp.insertion_point.target_node_name
                    module_scope_per_weight_qp_id[
                        weight_quantized_module_node_name] = qp_id
                elif qp.is_activation_quantization_point():
                    act_quant_tuples.append(resulting_tuple)
                    quantized_node_names = qp.directly_quantized_operator_node_names
                    quantized_node_per_activation_qp_id.update({
                        node_name: qp_id
                        for node_name in quantized_node_names
                    })
                self._quantizer_per_group_id[quant_id] = group_idx

            for weight_quantized_module_node_name, w_qp_id in module_scope_per_weight_qp_id.items(
            ):
                if weight_quantized_module_node_name not in quantized_node_per_activation_qp_id:
                    nncf_logger.warning(
                        'Module `%s` has quantized weights and no quantized inputs!',
                        weight_quantized_module_node_name)
                    continue
                a_qp_id = quantized_node_per_activation_qp_id[
                    weight_quantized_module_node_name]
                if w_qp_id in self.weight_qp_id_per_activation_qp_id:
                    nncf_logger.warning(
                        'Multiple weight quantizers per activation quantizer for `%s`',
                        weight_quantized_module_node_name)
                    continue
                self.weight_qp_id_per_activation_qp_id[w_qp_id] = a_qp_id

            adj_quants = AdjacentQuantizers(act_quant_tuples, wt_quant_tuples)
            self._groups_of_adjacent_quantizers.append(adj_quants)
Exemple #9
0
 def get_model_size(self, per_quantizer_bw: Dict[QuantizerId,
                                                 int]) -> np.int64:
     model_size = 0
     for qid, nparam in self._nparam_map.items():
         if qid in per_quantizer_bw:
             model_size += nparam * per_quantizer_bw[qid]
         else:
             logger.warning(
                 "[ModelSizeCalculator] Missing Bitwidth of QID: {}, using {} bits"
                 .format(str(qid), ModelSizeCalculator.FLOAT_BITWIDTH))
             model_size += nparam * ModelSizeCalculator.FLOAT_BITWIDTH
     return model_size
Exemple #10
0
 def _raise_overflow_fix_warning(self, applied_overflow_fix: bool):
     if applied_overflow_fix:
         if self._overflow_fix == 'enable':
             quantizers_with_overflow_fix_str = 'all weight quantizers'
         elif self._overflow_fix == 'first_layer_only':
             quantizers_with_overflow_fix_str = 'first convolution weight quantizers'
         logger.warning(
             'The overflow issue fix will be applied. '
             'Now {} will effectively use only 7 bits out of '
             '8 bits. This resolves the overflow issue problem on AVX2 and AVX-512 machines. '
             'Please take a look at the documentation for a detailed information.'
             .format(quantizers_with_overflow_fix_str))
Exemple #11
0
    def find_node(
        self, op_address: OperationAddress, tensor_metas: List[TensorMeta],
        tm_comparators: List[TensorMetaComparator]
    ) -> Optional[DynamicGraphNode]:
        iter_scopes = op_address.scope_in_model.get_iteration_scopes()
        # compare meta information about first input nodes during the matching. During the iteration some nodes may
        # change number of inputs, e.g. on concat of hidden outputs
        input_matcher = FirstInputsMatcher()
        op_exec_context = OperationExecutionContext(
            op_address.operator_name,
            op_address.scope_in_model,
            op_address.call_order,
            tensor_metas,
            input_matcher=input_matcher,
            tm_comparators=tm_comparators)
        node_candidates = self._find_nodes_with_matching_context_and_inputs(
            op_exec_context)
        if not node_candidates:
            op_exec_context = OperationExecutionContext(
                op_address.operator_name,
                op_address.scope_in_model,
                op_address.call_order,
                tensor_metas,
                tm_comparators=tm_comparators)
            node_candidates = self._find_nodes_with_matching_context_among_inputless(
                op_exec_context)
            if not node_candidates and iter_scopes:
                # ignore information about node creator and index of input
                comparators = tm_comparators + [
                    ShapeOnlyTensorMetaComparator()
                ]
                op_exec_context = OperationExecutionContext(
                    op_address.operator_name,
                    op_address.scope_in_model,
                    op_address.call_order,
                    tensor_metas,
                    tm_comparators=comparators)
                # match with starting points of iteration
                iter_nodes = self._match_first_iteration_nodes(
                    op_exec_context, iter_scopes)
                for node_key, node in iter_nodes.items():
                    node_candidates[node_key] = node

        node_candidates = list(node_candidates.values())
        result = None
        if len(node_candidates) == 1:
            result = node_candidates[0]
        if len(node_candidates) > 1:
            nncf_logger.warning("More than one node matches input")
            result = node_candidates[0]

        return result
Exemple #12
0
    def _get_quantizer_propagation_solution(self, nncf_graph: NNCFGraph,
                                            quantizable_weighted_layer_nodes: List[QuantizableWeightedLayerNode],
                                            custom_layer_node_names: List[NNCFNodeName],
                                            model: tf.keras.Model) \
            -> SingleConfigQuantizerSetup:
        ip_graph = InsertionPointGraph(
            nncf_graph,
            [qn.node.node_name for qn in quantizable_weighted_layer_nodes])

        pattern = TF_HW_FUSED_PATTERNS.get_full_pattern_graph()
        ip_graph = ip_graph.get_ip_graph_with_merged_hw_optimized_operations(
            pattern)

        input_preprocessing_nodes = self._get_input_preprocessing_nodes(
            nncf_graph, model)
        input_preprocessing_node_names = [
            n.node_name for n in input_preprocessing_nodes
        ]
        if custom_layer_node_names:
            logger.warning(
                'Custom layers [{}] '
                'will be ignored during quantization since it is not yet supported in NNCF'
                .format(", ".join([str(l) for l in custom_layer_node_names])))
        ignored_scopes_for_solver = self.ignored_scopes_per_group[QuantizerGroup.ACTIVATIONS] + \
                                    input_preprocessing_node_names + custom_layer_node_names

        solver = QuantizerPropagationSolver(
            ignored_scopes=ignored_scopes_for_solver,
            target_scopes=self.target_scopes_per_group[
                QuantizerGroup.ACTIVATIONS],
            hw_config=self.hw_config,
            default_trait_to_metatype_map=DEFAULT_TF_QUANT_TRAIT_TO_OP_DICT,
            default_qconfig_list=[
                self._get_default_qconfig(self.global_quantizer_constraints[
                    QuantizerGroup.ACTIVATIONS])
            ],
            quantizable_layer_nodes=quantizable_weighted_layer_nodes,
            global_constraints=self.global_quantizer_constraints,
            quantize_outputs=self.quantize_outputs)

        quantization_proposal = solver.run_on_ip_graph(ip_graph)
        multi_config_setup = quantization_proposal.quantizer_setup
        single_config_setup = multi_config_setup.select_first_qconfig_for_each_point(
        )
        finalized_proposal = quantization_proposal.finalize(
            single_config_setup)
        final_setup = solver.get_final_quantizer_setup(finalized_proposal)
        final_setup = self._handle_quantize_inputs_option(
            final_setup, nncf_graph)

        return final_setup
    def add_skipped_and_missing_keys(self,
                                     model_state_dict: Dict[str, torch.Tensor]):
        all_processed_keys = []
        optional_param_names = OPTIONAL_PARAMETERS_REGISTRY.get_parameters_names()
        params_to_skip = tuple('.' + name for name in optional_param_names)
        for keys in self._keys.values():
            all_processed_keys.extend(keys)

        for key in model_state_dict.keys():
            if key not in all_processed_keys:
                if key.endswith(params_to_skip) or key in optional_param_names:
                    self.add_key(key, ProcessedKeyStatus.SKIPPED)
                    nncf_logger.warning("The optional parameter {} is missed in the loaded state".format(key))
                else:
                    self.add_key(key, ProcessedKeyStatus.MISSING)
Exemple #14
0
 def visualize_graph(self, path: str):
     out_graph = self._get_graph_for_visualization()
     nx.drawing.nx_pydot.write_dot(out_graph, path)
     try:
         A = to_agraph(out_graph)
         A.layout('dot')
         png_path = os.path.splitext(path)[0] + '.png'
         A.draw(png_path)
     except ImportError:
         nncf_logger.warning(
             'Graphviz is not installed - only the .dot model visualization format will be used. '
             'Install pygraphviz into your Python environment and graphviz system-wide to enable '
             'PNG rendering.')
     except Exception:  #pylint:disable=broad-except
         nncf_logger.warning('Failed to render graph to PNG')
Exemple #15
0
def load_module_state(module: Module,
                      state: _ModuleState,
                      strict=False) -> None:
    for ch in module.modules():
        try:
            ch.train(state.training_state[ch])
        except KeyError as err:
            # KeyError could happen if the modules name were changed during forward
            # (e.g. LSTM block in NNCF examples)
            nncf_logger.warning(err)
            if strict:
                nncf_logger.error(err)
                return

    for p in module.parameters():
        p.requires_grad = state.requires_grad_state[p]
Exemple #16
0
    def load_state(self, state: Dict[str, Dict[str, Any]]) -> None:
        """
        Loads the compression controller state from the map of algorithm name to the dictionary with state attributes.

        :param state: map of the algorithm name to the dictionary with the corresponding state attributes.
        """
        if self.name in state:
            algo_state = state[self.name]
            if self._state_names.COMPRESSION_STAGE in state:
                if self.compression_stage() != state[
                        self._state_names.COMPRESSION_STAGE]:
                    nncf_logger.warning(
                        'Current CompressionStage ({}) of the compression controller does '
                        'not correspond to the value found in '
                        'the checkpoint ({})'.format(
                            self.compression_stage(),
                            state[self._state_names.COMPRESSION_STAGE]))
            self.loss.load_state(algo_state[self._state_names.LOSS])
            self.scheduler.load_state(algo_state[self._state_names.SCHEDULER])
Exemple #17
0
 def load_best_checkpoint(self, model):
     # load checkpoint with highest compression rate and positive acc budget
     possible_checkpoint_rates = [
         comp_rate
         for (comp_rate, acc_budget) in self._compressed_training_history
         if acc_budget >= 0
     ]
     if not possible_checkpoint_rates:
         nncf_logger.warning(
             'Could not produce a compressed model satisfying the set accuracy '
             'degradation criterion during training. Increasing the number of training '
             'epochs')
     best_checkpoint_compression_rate = sorted(
         possible_checkpoint_rates)[-1]
     resuming_checkpoint_path = self._best_checkpoints[
         best_checkpoint_compression_rate]
     nncf_logger.info('Loading the best checkpoint found during training '
                      '{}...'.format(resuming_checkpoint_path))
     model.load_weights(resuming_checkpoint_path)
Exemple #18
0
 def load_best_checkpoint(self, model):
     # load checkpoint with highest compression rate and positive acc budget
     possible_checkpoint_rates = self.get_compression_rates_with_positive_acc_budget(
     )
     if not possible_checkpoint_rates:
         nncf_logger.warning(
             'Could not produce a compressed model satisfying the set accuracy '
             'degradation criterion during training. Increasing the number of training '
             'epochs')
     best_checkpoint_compression_rate = sorted(
         possible_checkpoint_rates)[-1]
     resuming_checkpoint_path = self._best_checkpoints[
         best_checkpoint_compression_rate]
     nncf_logger.info('Loading the best checkpoint found during training '
                      '{}...'.format(resuming_checkpoint_path))
     resuming_checkpoint = torch.load(resuming_checkpoint_path,
                                      map_location='cpu')
     resuming_model_state_dict = resuming_checkpoint.get(
         'state_dict', resuming_checkpoint)
     load_state(model, resuming_model_state_dict, is_resume=True)
 def _handle_frozen_layers(self, target_model: NNCFNetwork):
     scopes_of_frozen_layers = []
     for weighted_node in target_model.get_weighted_original_graph_nodes():
         if not weighted_node.layer_attributes.weight_requires_grad:
             if self._should_consider_scope(weighted_node.node_name):
                 scopes_of_frozen_layers.append(weighted_node.node_name)
     scopes_to_print = '\n'.join(scopes_of_frozen_layers)
     if len(scopes_of_frozen_layers) > 0:
         is_allowed, reason = self._are_frozen_layers_allowed()
         if is_allowed:
             nncf_logger.warning(
                 '{}, compressing them without tuning weights.\n'
                 'Frozen layers:\n'
                 '{}'.format(reason, scopes_to_print))
         else:
             raise RuntimeError(
                 f'{reason}.\n'
                 f'Please unfreeze them or put into the Ignored Scope.\n'
                 f'Frozen Layers:\n'
                 f'{scopes_to_print}')
Exemple #20
0
    def find_node(
            self, op_address: OperationAddress, tensor_metas: List[TensorMeta],
            tm_comparators: List[TensorMetaComparator]) -> DynamicGraphNode:
        op_exec_context = OperationExecutionContext(
            op_address.operator_name,
            op_address.scope_in_model,
            op_address.call_order,
            tensor_metas,
            tm_comparators=tm_comparators)
        node_candidates = self._find_nodes_with_matching_context_and_inputs(
            op_exec_context)
        if not node_candidates:
            node_candidates = self._find_nodes_with_matching_context_among_inputless(
                op_exec_context)

        node_candidates = list(node_candidates.values())
        result = None
        if len(node_candidates) == 1:
            result = node_candidates[0]
        if len(node_candidates) > 1:
            nncf_logger.warning("More than one node matches input")
            result = node_candidates[0]

        return result
Exemple #21
0
    def _get_quantized_nodes_for_output(self,
                                        nxmodel,
                                        insetrion_points,
                                        node_name,
                                        quantized_nodes_for_output=None):
        if quantized_nodes_for_output is None:
            if node_name in insetrion_points:
                return [node_name]
            quantized_nodes_for_output = []

        for predecessor in nxmodel.predecessors(node_name):
            if nxmodel.out_degree(predecessor) > 1:
                logger.warning(
                    'Removing of FakeQuantize after layer {} '
                    'with multiple outputs is not fully supported'.format(
                        predecessor))
            if nxmodel.nodes[predecessor][
                    'type'] in LAYERS_AGNOSTIC_TO_DATA_PRECISION:
                self._get_quantized_nodes_for_output(
                    nxmodel, insetrion_points, predecessor,
                    quantized_nodes_for_output)
            elif predecessor in insetrion_points:
                quantized_nodes_for_output.append(predecessor)
        return quantized_nodes_for_output
Exemple #22
0
    def wrap_inputs(self, model_args, model_kwargs):
        bound_model_params = self._fwd_signature.bind(*model_args,
                                                      **model_kwargs)
        for param_name in self._fwd_params_to_input_infos_odict:
            param_kind = self._fwd_signature.parameters[param_name].kind
            if param_kind is Parameter.VAR_POSITIONAL or param_kind is Parameter.VAR_KEYWORD:
                nncf_logger.warning(
                    "An input_info tensor was bound to a *args or **kwargs variadic parameter in the"
                    "forward's signature! This is currently unsupported by NNCF. Input compression may "
                    "be incorrect.")
                # Currently won't support input info mapping to *args or **kwargs-mapped parameters
                continue

            if param_name not in bound_model_params.arguments:
                nncf_logger.warning(
                    "A call to a compressed model's forward occured without one of the params"
                    "specified in input_infos! Input compression may be incorrect. Trying to recover "
                    "by wrapping the default value for the parameter.")
                bound_model_params.apply_defaults()

            potential_tensor = bound_model_params.arguments[param_name]
            if potential_tensor is not None:
                bound_model_params.arguments[param_name] = nncf_model_input(
                    bound_model_params.arguments[param_name])
            else:
                # Default was None - cannot wrap as-is. Will wrap a dummy tensor as specified in
                # input infos - will conserve the call order of nncf_model_input nodes,
                # and the post-hooks for the input node will execute. The result won't go anywhere, though.
                nncf_logger.warning(
                    "Wrapping a dummy tensor for input {}".format(param_name))
                info_for_missing_input = self._fwd_params_to_input_infos_odict[
                    param_name]
                device = 'cuda'
                if self._module_ref_for_device is not None:
                    device = next(
                        self._module_ref_for_device.parameters()).device
                dummy_tensor = create_mock_tensor(info_for_missing_input,
                                                  device)
                _ = nncf_model_input(dummy_tensor)

        return bound_model_params.args, bound_model_params.kwargs
Exemple #23
0
def create_compressed_model(model: Module,
                            config: NNCFConfig,
                            compression_state: Optional[Dict[str, Any]] = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            wrap_outputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True) \
        -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param compression_state: representation of the entire compression state to unambiguously restore
    the compressed model. Includes builder and controller states.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object. The dummy_forward_fn code MUST contain calls to nncf.nncf_model_input
    functions made with each compressed model input tensor in the underlying model's args/kwargs tuple, and these
    calls should be exactly the same as in the wrap_inputs_fn function code (see below); if dummy_forward_fn is
    specified, then wrap_inputs_fn also must be specified.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input. Must be specified if dummy_forward_fn is specified.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dummy_forward_fn is not None and wrap_inputs_fn is None:
        raise ValueError(
            "A custom dummy forward function was specified, but the corresponding input wrapping function "
            "was not. In case a custom dummy forward function is specified for purposes of NNCF graph "
            "building, then the wrap_inputs_fn parameter MUST also be specified and be consistent with "
            "the input wrapping done in dummy_forward_fn.")

    is_legacy_model_state_dict = compression_state is not None and \
                                 BaseController.BUILDER_STATE not in compression_state and \
                                 BaseController.CONTROLLER_STATE not in compression_state
    maybe_convert_legacy_names_in_compress_state(compression_state)
    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    original_model_accuracy = None
    if is_accuracy_aware_training(config):
        if config.has_extra_struct(ModelEvaluationArgs):
            evaluation_args = config.get_extra_struct(ModelEvaluationArgs)
            with torch.no_grad():
                original_model_accuracy = evaluation_args.eval_fn(model)
                nncf_logger.info("Non-compressed model accuracy = {}".format(
                    original_model_accuracy))

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        wrap_outputs_fn=wrap_outputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching,
        original_model_accuracy=original_model_accuracy)

    should_init = compression_state is None

    builder = create_compression_algorithm_builder(config, should_init)
    is_state_loadable = not is_legacy_model_state_dict and compression_state is not None
    if is_state_loadable:
        builder.load_state(compression_state[BaseController.BUILDER_STATE])

    builder.apply_to(compressed_model)
    compression_ctrl = builder.build_controller(compressed_model)
    if is_state_loadable:
        compression_ctrl.load_state(
            compression_state[BaseController.CONTROLLER_STATE])

    # Required to ensure that the model leaving create_compressed_model has correct compressed graph.
    # In particular, this is currently required for correct functioning of RNNs.
    compressed_model.rebuild_graph()

    try:
        if is_legacy_model_state_dict:
            from nncf.torch import load_state
            state_dict_to_load = compression_state.get('state_dict',
                                                       compression_state)
            load_state(compressed_model, state_dict_to_load, is_resume=True)
    finally:
        if dump_graphs and is_main_process():
            compressed_model_graph = compressed_model.get_graph()
            compressed_model_graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))

    # Synchronize all processes if run in distributed mode
    if is_dist_avail_and_initialized():
        try:
            barrier()
        # Exception can be raised during running barrier
        # if the backend not in the supported list https://pytorch.org/docs/stable/distributed.html
        except RuntimeError as err:
            nncf_logger.warning(err)
            nncf_logger.warning(
                "NNCF continues work, while does not guarantee that "
                "the processes will finish model's compression at the same time. "
                "If your training pipeline demands the processes be synchronized, please, "
                "keep attention to that error")
            return compression_ctrl, compressed_model
    compressed_model.get_tracing_context().disable_trace_dynamic_graph()
    return compression_ctrl, compressed_model
Exemple #24
0
    def _collect_custom_layer_infos(
            self,
            model: tf.keras.Model,
            use_graph_var_names: bool = False) -> Dict[str, CustomLayerInfo]:
        custom_layers = TFModelConverter.get_custom_layers(model)
        retval = {}
        for layer_name, layer in custom_layers.items():
            layer_input_spec = [tf.TensorSpec.from_tensor(tensor)
                                for tensor in layer.input] if isinstance(layer.input, list) \
                else tf.TensorSpec.from_tensor(layer.input)

            # TODO (vshampor) : Use the custom layer's inbound_nodes/outbound_nodes to determine what edges
            #  should connect it to the rest of the graph. Currently the custom layer
            #  subgraph will be present in the NNCFGraph after conversion, which is useful
            #  for purposes of weight modification target point creation and usage,
            #  but the subgraph won't be connected to the rest of the graph; in the main graph
            #  component, the custom layer will still be represented by a single node
            concr_fn = tf.function(layer).get_concrete_function(
                layer_input_spec, training=False)
            wrapped_function = convert_variables_to_constants_v2(
                concr_fn, lower_control_flow=False)

            graphdef_nodes = wrapped_function.graph.as_graph_def().node
            graphdef_name_to_layer_var_map = {} if use_graph_var_names else \
                TFModelConverter._get_graphdef_name_to_layer_var_map(concr_fn)
            nodes = {
                graphdef_name_to_layer_var_map.get(node.name, node.name): node
                for node in graphdef_nodes
            }
            graphdef_node_name_vs_node = {
                node.name: node
                for node in graphdef_nodes
            }

            custom_layer_info = CustomLayerInfo()
            for pretty_node_name, node in nodes.items():
                custom_layer_info.graphdef_node_name_to_pretty_node_name[
                    node.name] = pretty_node_name

            for pretty_node_name, node in nodes.items():
                weight_node_name = None
                metatype = get_op_metatype(node.op)
                if metatype in WEIGHTABLE_TF_OP_METATYPES:
                    graphdef_weight_node_name = self._get_graphdef_node_name_for_custom_layer_node_weight(
                        node, graphdef_node_name_vs_node)
                    if graphdef_weight_node_name in graphdef_name_to_layer_var_map:
                        weight_node_name = graphdef_name_to_layer_var_map[
                            graphdef_weight_node_name]
                    else:
                        nncf_logger.warning(
                            'Could not associate a weighted custom layer node {} '
                            'with a weight attribute of the custom layer - the corresponding weight '
                            'will not be compressed! Make sure that the corresponding custom layer '
                            'weight has a name.'.format(pretty_node_name))

                custom_layer_info.node_infos[
                    pretty_node_name] = CustomLayerNodeInfo(
                        graphdef_node_name=node.name,
                        custom_layer_name=layer_name,
                        target_node_name=pretty_node_name,
                        node_type=node.op,
                        node_metatype=get_op_metatype(node.op),
                        weight_node_name=weight_node_name,
                        dtype=Dtype.FLOAT
                        if node.attr['dtype'].type == 1 else Dtype.INTEGER)

                custom_layer_info.shared_weight_node_names_vs_weighted_op_node_names[
                    weight_node_name].add(pretty_node_name)

                for idx, input_graphdef_node_name_and_output_port_str in enumerate(
                        node.input):
                    if '^' in input_graphdef_node_name_and_output_port_str:
                        continue  # Skip control_inputs
                    splits = input_graphdef_node_name_and_output_port_str.split(
                        ':')
                    if len(splits) == 1:
                        input_graphdef_node_name = splits[0]
                        output_port_id = 0
                    elif len(splits) == 2:
                        input_graphdef_node_name = splits[0]
                        output_port_id = int(splits[1])
                    else:
                        raise RuntimeError(
                            "Could not parse NodeDef's input field!")

                    pretty_input_node_name = \
                        custom_layer_info.graphdef_node_name_to_pretty_node_name[input_graphdef_node_name]

                    # TODO (vshampor): add proper tensor_shape, will probably involve
                    #                  running as_graph_def(add_shapes=True)
                    custom_layer_info.edge_infos[(pretty_input_node_name, pretty_node_name)] = \
                        CustomLayerEdgeInfo(tensor_shape=None,
                                            input_port_id=idx,
                                            output_port_id=output_port_id,
                                            dtype=custom_layer_info.node_infos[pretty_node_name].dtype)
                retval[layer_name] = custom_layer_info
        return retval
Exemple #25
0
def patch_extension_build_function():
    """
    The function patches PyTorch and fix a bug inside CUDA extensions building;
    The bug must be fixed with a new PyTorch 1.8.0
    """
    try:
        torch_version_numbers = torch.__version__.split('+', maxsplit=1)[0]
        split_torch_version = list(map(int, torch_version_numbers.split('.')))
    except ValueError as e:
        logger.warning(
            'Skip applying a patch to building extension with a reason: '
            'Cannot parse a PyTorch version with the error {}'.format(e))
        return

    if split_torch_version < [1, 8, 0]:
        if torch.__version__ not in ('1.5.1', '1.7.0', '1.7.1'):
            logger.warning(
                'Skip applying a patch to building extension with a reason: '
                'PyTorch version is not supported for this')
            return

        def sort_arch_flags(func):
            def wrapped(*args, **kwargs):
                flags = func(*args, **kwargs)
                return sorted(flags)

            return wrapped

        # pylint:disable=protected-access
        torch.utils.cpp_extension._get_cuda_arch_flags = \
            sort_arch_flags(torch.utils.cpp_extension._get_cuda_arch_flags)

    else:
        import re
        import sys
        from pathlib import Path

        # A hackish backport of the https://github.com/pytorch/pytorch/pull/56015 fix.
        def remove_nvcc_dep_build(func):
            def wrapped(*args, **kwargs):
                func(*args, **kwargs)
                if len(args) > 0:
                    target_ninja_file_path = args[0]
                else:
                    target_ninja_file_path = kwargs['path']
                with safe_open(Path(target_ninja_file_path),
                               'r') as ninja_build_file:
                    ninja_file_contents = ninja_build_file.read()
                with safe_open(Path(target_ninja_file_path),
                               'w') as ninja_build_file:
                    ninja_build_file.write(
                        re.sub(
                            r'--generate-dependencies-with-compile --dependency-output \$out\.d',
                            '', ninja_file_contents))

            return wrapped

        if sys.platform != 'win32':
            # pylint:disable=protected-access
            torch.utils.cpp_extension._write_ninja_file = \
                remove_nvcc_dep_build(torch.utils.cpp_extension._write_ninja_file)
Exemple #26
0
    def apply_init(self) -> SingleConfigQuantizerSetup:
        from nncf.torch.automl.environment.quantization_env import QuantizationEnv
        from nncf.torch.automl.agent.ddpg.ddpg import DDPG
        from nncf.common.utils.debug import DEBUG_LOG_DIR

        if self._dump_autoq_data or is_debug():
            dump_dir = self._init_args.config.get('log_dir', None)
            if dump_dir is None:
                dump_dir = DEBUG_LOG_DIR
            self.dump_dir = Path(dump_dir) / Path("autoq") / Path(
                "autoq_agent_dump")
            self.dump_dir.mkdir(parents=True, exist_ok=True)

            self.policy_dict = OrderedDict()  #key: episode
            self.best_policy_dict = OrderedDict()  #key: episode

            self._init_args.config['episodic_nncfcfg'] = str(
                self.dump_dir / "episodic_nncfcfg")
            os.makedirs(self._init_args.config['episodic_nncfcfg'],
                        exist_ok=True)

            try:
                from torch.utils.tensorboard import SummaryWriter
                self.tb_writer = SummaryWriter(self.dump_dir)
                # log compression config to tensorboard
                self.tb_writer.add_text(
                    'AutoQ/run_config',
                    json.dumps(self._init_args.config['compression'],
                               indent=4,
                               sort_keys=False).replace("\n", "\n\n"), 0)
            except ModuleNotFoundError:
                logger.warning(
                    "Tensorboard installation not found! Install tensorboard Python package "
                    "in order for AutoQ tensorboard statistics data to be dumped"
                )

        start_ts = datetime.now()

        from nncf.torch.automl.environment.quantization_env import QuantizationEnvParams
        env_params = QuantizationEnvParams(
            compression_ratio=self._params.compression_ratio,
            eval_subset_ratio=self._params.eval_subset_ratio,
            skip_constraint=self._params.skip_constraint,
            performant_bw=True,
            finetune=self._params.finetune,
            bits=self._params.bits,
            dump_init_precision_data=self._dump_autoq_data,
            log_dir=Path(DEBUG_LOG_DIR) / Path("autoq"))

        # Instantiate Quantization Environment
        env = QuantizationEnv(self._model,
                              self.quantization_controller,
                              self._hw_precision_constraints,
                              self._init_args.data_loader,
                              self._init_args.eval_fn,
                              hw_config_type=self._hw_cfg_type,
                              params=env_params)

        nb_state = len(env.state_list)
        nb_action = 1

        # Control buffer length at run manager level
        if "warmup_iter_number" not in self._ddpg_hparams_override:
            self._ddpg_hparams_override["warmup_iter_number"] = 10

        self._ddpg_hparams_override["rmsize"] = \
            self._ddpg_hparams_override["warmup_iter_number"] * (len(env.master_df)+1)

        # Instantiate Automation Agent
        agent = DDPG(nb_state,
                     nb_action,
                     self._iter_number,
                     hparam_override=self._ddpg_hparams_override)

        if self._dump_autoq_data and self.tb_writer is not None:
            # Need to replace '|' in nodestr (QuantizerId/QuantizerPointId)
            # to '+' as it is a special character in markdown
            temp_df = deepcopy(env.master_df[env.state_list + ['n_op']])
            temp_df["modified_nodestr"] = list(
                map(lambda x: x.replace("|", "+"), temp_df.index.tolist()))
            temp_df = temp_df.set_index("modified_nodestr").reset_index()
            self.tb_writer.add_text('AutoQ/state_embedding',
                                    temp_df.to_markdown())

        best_policy, best_reward = self._search(agent, env)

        end_ts = datetime.now()

        final_qid_vs_qconfig_map = env.select_config_for_actions(best_policy)

        final_quantizer_setup = self.quantization_controller.get_quantizer_setup_for_current_state(
        )
        for qp_id, qconf in final_qid_vs_qconfig_map.items():
            final_quantizer_setup.quantization_points[qp_id].qconfig = qconf

        str_bw = [
            str(element)
            for element in self.get_bitwidth_per_scope(final_quantizer_setup)
        ]
        logger.info('\n'.join(
            ['[AutoQ]\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']']))
        logger.info('[AutoQ] best_reward: {}'.format(best_reward))
        logger.info('[AutoQ] best_policy: {}'.format(best_policy))
        logger.info("[AutoQ] Search Complete")
        logger.info(
            "[AutoQ] Elapsed time of AutoQ Precision Initialization (): {}".
            format(end_ts - start_ts))
        return final_quantizer_setup
Exemple #27
0
def extract_range_init_params(
        config: NNCFConfig) -> Optional[Dict[str, object]]:
    """
    Extracts parameters of the quantization range initialization algorithm from the
    compression algorithm NNCFconfig.

    :param config: An instance of the NNCFConfig.
    :return: Parameters of the quantization range initialization algorithm.
    """
    algo_config = extract_algo_specific_config(config, 'quantization')
    init_range_config_dict_or_list = algo_config.get('initializer',
                                                     {}).get('range', {})

    range_init_args = None
    try:
        range_init_args = config.get_extra_struct(QuantizationRangeInitArgs)
    except KeyError:
        if not init_range_config_dict_or_list:
            logger.warning(
                'Initializer section not specified for quantization algorithm in NNCF config and '
                'quantization init args not supplied - the necessary parameters are not specified '
                'to run the quantizer range initialization algorithm')
            return None

    if not init_range_config_dict_or_list:
        logger.warning(
            'Enabling quantization range initialization with default parameters.'
        )
        init_range_config_dict_or_list = {'num_init_samples': 256}

    max_num_init_samples = 0
    global_range_init_config = None
    scope_overrides = []  # type: List[PerLayerRangeInitConfig]
    if isinstance(init_range_config_dict_or_list, dict):
        global_range_init_config = RangeInitConfig.from_dict(
            init_range_config_dict_or_list)
        max_num_init_samples = global_range_init_config.num_init_samples
    else:
        for sub_init_range_config_dict in init_range_config_dict_or_list:
            scope_overrides.append(
                PerLayerRangeInitConfig.from_dict(sub_init_range_config_dict))
            max_num_init_samples_config = max(scope_overrides,
                                              key=lambda x: x.num_init_samples)
            max_num_init_samples = max_num_init_samples_config.num_init_samples

    if max_num_init_samples == 0:
        return None
    if range_init_args is None:
        raise ValueError(
            'Should run range initialization as specified via config,'
            'but the initializing data loader is not provided as an extra struct. '
            'Refer to `NNCFConfig.register_extra_structs` and the `QuantizationRangeInitArgs` class'
        )

    params = {
        'init_range_data_loader': range_init_args.data_loader,
        'device': range_init_args.device,
        'global_init_config': global_range_init_config,
        'per_layer_range_init_configs': scope_overrides
    }

    return params
Exemple #28
0
    def _get_quantizer_setup(self,
                             model: tf.keras.Model) -> TFQuantizationSetup:
        converter = TFModelConverterFactory.create(model)
        nncf_graph = converter.convert()
        nodes = nncf_graph.get_all_nodes()
        for node in nodes:
            if node.metatype in NOT_SUPPORT_LAYER_METATYPES:
                logger.warning(
                    'The layer {} is not supported by the quantization algorithm'
                    .format(
                        get_original_name_and_instance_idx(node.node_name)[0]))

        quantizable_weighted_layer_nodes = self._get_quantizable_weighted_layer_nodes(
            nncf_graph)
        custom_layer_nodes = self._get_custom_layer_node_names(
            nncf_graph, converter)

        quantizer_setup = self._get_quantizer_propagation_solution(
            nncf_graph, quantizable_weighted_layer_nodes, custom_layer_nodes,
            model)
        setup = TFQuantizationSetup()

        quantized_layer_names_vs_qconfigs = {
        }  # type: Dict[str, QuantizerConfig]
        qp_id_to_index = {}  # type: Dict[QuantizationPointId, int]
        tf_setup_qp_index = 0
        applied_overflow_fix = False
        first_conv_nodes = get_first_nodes_of_type(nncf_graph, ['Conv2D'])
        for qp_id, qp in quantizer_setup.quantization_points.items():
            if qp.is_weight_quantization_point():
                target_node = nncf_graph.get_node_by_name(
                    qp.insertion_point.target_node_name)
                is_custom, layer_info = converter.get_layer_info_for_node(
                    target_node.node_name)
                if is_custom:
                    raise RuntimeError(
                        "Quantizing custom layer weights is currently unsupported!"
                    )
                layer_name = layer_info.layer_name
                qconfig = qp.qconfig
                if layer_name in quantized_layer_names_vs_qconfigs:
                    assigned_qconfig = quantized_layer_names_vs_qconfigs[
                        layer_name]
                    if qconfig != assigned_qconfig:
                        raise RuntimeError(
                            f"Inconsistent quantizer configurations selected by solver for one and the "
                            f"same quantizable layer! Tried to assign {qconfig} to {layer_name} as "
                            f"specified by QP {qp_id}, but the layer already has quantizer "
                            f"config {assigned_qconfig} assigned to it!")
                    continue  # The layer has already been quantized
                quantized_layer_names_vs_qconfigs[layer_name] = qconfig
                metatype = target_node.metatype
                assert issubclass(metatype, TFLayerWithWeightsMetatype)
                for weight_def in metatype.weight_definitions:
                    op_name = self._get_quantizer_operation_name(
                        target_node.node_name, weight_def.weight_attr_name)
                    self._op_names.append(op_name)

                    half_range = self._get_half_range(qconfig, target_node,
                                                      first_conv_nodes)
                    applied_overflow_fix = applied_overflow_fix or half_range
                    quantizer_spec = TFQuantizerSpec.from_config(
                        qconfig,
                        narrow_range=not half_range,
                        half_range=half_range)
                    target_point = TFLayerWeight(layer_info.layer_name,
                                                 weight_def.weight_attr_name)
                    qpoint = TFQuantizationPoint(op_name, quantizer_spec,
                                                 target_point)
            else:
                assert qp.is_activation_quantization_point()
                ip = qp.insertion_point
                assert isinstance(ip, ActivationQuantizationInsertionPoint)
                target_node_name = ip.target_node_name
                input_port_id = ip.input_port_id
                fake_quantize_name = self._get_fake_quantize_name(
                    target_node_name, input_port_id)
                quantizer_spec = TFQuantizerSpec.from_config(
                    qp.qconfig, narrow_range=False, half_range=False)
                fake_quantize_layer = FakeQuantize(quantizer_spec,
                                                   name=fake_quantize_name)
                self._op_names.append(fake_quantize_layer.op_name)

                is_custom, layer_info = converter.get_layer_info_for_node(
                    target_node_name)
                if is_custom:
                    raise RuntimeError(
                        "Quantizing custom layer activations is currently unsupported!"
                    )
                if input_port_id is not None:
                    target_point = TFBeforeLayer(
                        layer_info.layer_name,
                        instance_idx=layer_info.instance_idx,
                        input_port_id=input_port_id)
                else:
                    target_point = TFAfterLayer(
                        layer_info.layer_name,
                        instance_idx=layer_info.instance_idx,
                        output_port_id=0)
                qpoint = TFQuantizationPoint(fake_quantize_name,
                                             quantizer_spec, target_point)

            setup.add_quantization_point(qpoint)
            qp_id_to_index[qp_id] = tf_setup_qp_index
            tf_setup_qp_index += 1

        setup = self._generate_unified_scale_groups(model, quantizer_setup,
                                                    qp_id_to_index, setup)

        self._raise_overflow_fix_warning(applied_overflow_fix)

        return setup
    def run(self) -> Dict[str, torch.Tensor]:
        """
        :return: the model state dict with matched parameters
        """
        normalized_model_keys = NormalizedKeys(list(self.model_state_dict.keys()),
                                               keys_to_ignore=self.ignored_keys)
        normalized_keys_to_load = NormalizedKeys(list(self.state_dict_to_load.keys()),
                                                 keys_to_ignore=self.ignored_keys)

        has_version_agnostic_names = False
        cross_match_key_map = self._cross_match_version_agnostic_names(list(normalized_keys_to_load),
                                                                       list(normalized_model_keys))

        for matched_checkpoint_key, matched_model_key in cross_match_key_map.items():
            if matched_checkpoint_key != matched_model_key:
                has_version_agnostic_names = True

        if has_version_agnostic_names:
            warnings.warn('Legacy NNCF-enabled .pth checkpoint has been loaded! '
                          'The version-agnostic `RELU` operator name entries in the state dict have been deprecated. '
                          'The loader will try to match these entries to the correspoindig `relu` and `relu_` op '
                          'names. The newly exported checkpoints will be adjusted to the new format.',
                          category=DeprecationWarning)


        if normalized_keys_to_load.has_legacy_storage_keys:
            warnings.warn('Legacy NNCF-enabled .pth checkpoint has been loaded! '
                          'The "activation_quantizers" storage key is replaced with '
                          '"external_quantizers" in newer versions of NNCF, and support '
                          'for the legacy storage key will be dropped in a future release. '
                          'This checkpoint will be loaded; update your checkpoint file by saving this model\'s'
                          'checkpoint file again.', category=DeprecationWarning)

        if normalized_model_keys.is_unified_group_detected and not normalized_keys_to_load.is_unified_group_detected:
            warnings.warn('Unified parameters are detected in the compressed model, but all parameters are independent '
                          'and separate in the loading checkpoint. The unified parameters will be initialized by one of'
                          'the corresponding separate parameter in the checkpoint. That may slightly degrade the '
                          'accuracy, but should allow to not start training compression from scratch with unified '
                          'params.', category=DeprecationWarning)
        ignored_keys = normalized_model_keys.ignored_orig_keys + normalized_keys_to_load.ignored_orig_keys
        self._processed_keys.extend_keys(ignored_keys, ProcessedKeyStatus.SKIPPED)
        if ignored_keys:
            ignored_keys_str = '\n'.join(set(ignored_keys))
            nncf_logger.warning("The following parameters were skipped from matching checkpoint's keys:\n{}"
                                .format(ignored_keys_str))

        for normalized_key_to_load in normalized_keys_to_load:
            key_to_load = normalized_keys_to_load.get_orig_key(normalized_key_to_load)
            normalized_key_to_load = cross_match_key_map.get(normalized_key_to_load,
                                                             normalized_key_to_load)
            if normalized_key_to_load in normalized_model_keys:
                model_key = normalized_model_keys.get_orig_key(normalized_key_to_load)
                value_to_load = self.state_dict_to_load[key_to_load]
                size_of_value_to_load = value_to_load.size()
                size_of_model_value = self.model_state_dict[model_key].size()
                if size_of_value_to_load == size_of_model_value:
                    self._new_dict[model_key] = value_to_load
                    self._processed_keys.add_key(model_key, ProcessedKeyStatus.MATCHED)
                else:
                    nncf_logger.warning("Different size of value of '{}' in resuming dictionary ({}) and in model ({})"
                                        .format(model_key, size_of_value_to_load, size_of_model_value, ))
                    self._processed_keys.add_key(model_key, ProcessedKeyStatus.SIZE_MISMATCHED)
            else:
                self._processed_keys.add_key(key_to_load, ProcessedKeyStatus.UNEXPECTED)
        self._processed_keys.add_skipped_and_missing_keys(self.model_state_dict)
        return self._new_dict
Exemple #30
0
    def __init__(self, model: NNCFNetwork,
                 quantization_controller: ExperimentalQuantizationController,
                 hw_precision_constraints: HardwareQuantizationConstraints,
                 eval_loader: torch.utils.data.DataLoader,
                 eval_fn: Callable[[nn.Module, torch.utils.data.DataLoader],
                                   float], hw_config_type: HWConfigType,
                 params: QuantizationEnvParams):

        logger.info("[Q.Env] Instantiating NNCF Quantization Environment")
        self.qctrl = quantization_controller
        self.qmodel = model
        self.eval_loader = eval_loader
        self.eval_fn = eval_fn
        self._hw_precision_constraints = hw_precision_constraints
        self._bn_adaptation = None

        self.model_name = self.qmodel.nncf_module.__class__.__name__

        # Check and only proceed if target device is supported by Q.Env
        self.hw_cfg_type = hw_config_type
        assert self.hw_cfg_type in [None, HWConfigType.VPU]

        # Set target compression ratio
        self.compression_ratio = params.compression_ratio

        self.eval_loader = PartialDataLoader(
            self.eval_loader, iter_ratio=params.eval_subset_ratio)

        # Bool to disable hard resource constraint
        self.skip_constraint = params.skip_constraint

        # Bool to enable bw alignment of adj. Q group to lower precision
        self.performant_bw = params.performant_bw

        # Bool to enable fine-tuning in each episode. Placeholder for now
        self.finetune = False

        # Counter for number of evaluate_strategy calls
        self._n_eval = 0

        # Configure search space for precision according to target device
        if self.hw_cfg_type is None:
            self.model_bitwidth_space = params.bits
        elif self.hw_cfg_type is HWConfigType.VPU:
            self.model_bitwidth_space = self._hw_precision_constraints.get_all_unique_bitwidths(
            )
        self.model_bitwidth_space = sorted(list(self.model_bitwidth_space))

        # Create mapping of QuantizerId to the space of the corresponding quantizer's allowed qconfigs
        #pylint:disable=line-too-long
        self.qconfig_space_map = OrderedDict.fromkeys(
            self.qctrl.all_quantizations.keys(
            ))  # type: Dict[QuantizerId, List[QuantizerConfig]]
        if self.hw_cfg_type is None:
            for qid in self.qconfig_space_map.keys():
                conf = self.qctrl.all_quantizations[qid].get_quantizer_config()
                conf_list_to_set = []
                for bit in self.model_bitwidth_space:
                    bit_adjusted_conf = deepcopy(conf)
                    bit_adjusted_conf.num_bits = bit
                    conf_list_to_set.append(bit_adjusted_conf)
                self.qconfig_space_map[qid] = conf_list_to_set
        else:
            for qid in self.qconfig_space_map:
                conf_list_to_set = []
                bw_vs_qconfigs_dict = self._hw_precision_constraints.get_bitwidth_vs_qconfigs_dict(
                    qid)
                for bitwidth, qconf_list in bw_vs_qconfigs_dict.items():
                    target_qconf = qconf_list[0]
                    if len(qconf_list) > 1:
                        logger.warning(
                            "Received multiple quantizer configurations {qc_lst} for same bitwidth {bw} "
                            "for quantizer {q} - AutoQ can currently only choose among bitwidths, but not "
                            "within quantizer configuration space with the same bitwidths. Selecting {qc} "
                            "as the target configuration for bitwidth {bw}".
                            format(qc_lst=";".join(
                                [str(qconf) for qconf in qconf_list]),
                                   bw=bitwidth,
                                   q=str(qid),
                                   qc=str(target_qconf)))
                    conf_list_to_set.append(target_qconf)

                self.qconfig_space_map[qid] = conf_list_to_set

        # Quantizer Master Table Creation
        self.groups_of_adjacent_quantizers = self.qctrl._groups_of_adjacent_quantizers
        self.quantizer_table = self._create_quantizer_table()

        # Create master dataframe to keep track of quantizable layers and their attributes
        self.master_df, self.state_list = self._get_state_space(
            self.qctrl, self.qmodel, self.quantizer_table)
        if self.master_df.isnull().values.any():
            raise ValueError("Q.Env Master Dataframe has null value(s)")

        assert len(self.quantizer_table) == len(self.qctrl.all_quantizations), \
            "Number of Quantizer is not tally between quantizer table and quantization controller"

        # MinMaxScaler for State Embedding
        self.state_scaler = MinMaxScaler()
        self.state_scaler.fit(self.master_df[self.state_list])

        # Mapping required for quantizer BW alignment flow
        self.adjq_groupwise_intersecting_bw_space = self._create_map_of_adjq_groupid_to_common_bw_space(
        )
        self.adjq_groupwise_df_lut_keys = self._create_map_of_adjq_groupid_to_df_lut_keys(
        )

        # Model Size Calculation
        self.model_size_calculator = ModelSizeCalculator(
            self.qmodel, self.qconfig_space_map)
        self.orig_model_size = self.model_size_calculator.fp_model_size
        self.min_model_size = self.model_size_calculator.min_model_size
        self.max_model_size = self.model_size_calculator.max_model_size
        self.target_model_size = self.orig_model_size * self.compression_ratio

        if self.target_model_size < self.min_model_size and self.target_model_size > self.max_model_size:
            raise ValueError(
                "Model Size Ratio {} is out of bound ({}, {})".format(
                    self.compression_ratio,
                    self.min_model_size / self.orig_model_size,
                    self.max_model_size / self.orig_model_size))

        # Compression Ratio Calculation (BOP relative to 8-bit)
        self.compression_ratio_calculator = CompressionRatioCalculator(
            self.qmodel.get_flops_per_module(),
            self.qctrl.get_quantizer_setup_for_current_state(), self.qctrl.
            groups_of_adjacent_quantizers.weight_qp_id_per_activation_qp_id)

        # Evaluate and store metric score of pretrained model
        self._evaluate_pretrained_model()
        self.qmodel_init_sd = deepcopy(self.qmodel.state_dict())

        self.reset()

        self._dump_autoq_data = params.dump_init_precision_data
        if self._dump_autoq_data or is_debug():
            dump_dir = params.log_dir
            if dump_dir is None:
                dump_dir = DEBUG_LOG_DIR
            self.dump_dir = Path(dump_dir) / Path("autoq_env_dump")
            self.dump_dir.mkdir(parents=True, exist_ok=True)
            # Serialize Q.Env information. Note that these functions should be at the end of Q.Env Initialization.
            self._dump_master_df()
            self._dump_quantized_graph()
            self._dump_groups_of_adjacent_quantizers()