def forward(self, x):
     if is_debug():
         self.call_count += 1
     if self.init_stage:
         return x
     self.set_level_ranges()
     return self.quantize(x)
Beispiel #2
0
    def wrapped(*args, **kwargs):
        ctx = get_current_context()
        if not ctx or getattr(ctx, 'in_operator', False) or not ctx.is_tracing:
            op1 = operator(*args, **kwargs)
            return op1

        ctx.in_operator = True

        if operator_info.custom_trace_fn is not None:
            result = operator_info.custom_trace_fn(operator, *args, **kwargs)
        else:
            ia_op_exec_context = ctx.get_caller_context(operator_info.name)
            ctx.register_operator_call(ia_op_exec_context.operator_name, ia_op_exec_context.scope_in_model)

            op_input = OperatorInput(list(args), kwargs)
            processed_input = ctx.execute_pre_hooks(ia_op_exec_context, op_input)
            args = tuple(processed_input.op_args)
            kwargs = processed_input.op_kwargs
            fargs = flatten_args(args, kwargs)

            node = ctx.find_operator_node(fargs, ia_op_exec_context)
            if is_debug():
                ctx.register_node_call(ctx.graph.get_node_key_by_id(node.node_id))

            result = operator(*args, **kwargs)

            result = trace_tensors(result, node)
            result = ctx.execute_post_hooks(ia_op_exec_context, result)

        ctx.in_operator = False
        return result
Beispiel #3
0
    def __enter__(self):
        global _CURRENT_CONTEXT
        self._save_context = _CURRENT_CONTEXT
        _CURRENT_CONTEXT = self
        self._init_thread_local()
        if is_debug():
            self.reset_node_call_counters()

        return self
Beispiel #4
0
    def __init__(self,
                 module,
                 input_infos: List[ModelInputInfo] = None,
                 dummy_forward_fn=None,
                 scopes_without_shape_matching=None,
                 ignored_scopes=None,
                 target_scopes=None):
        super().__init__()
        self.set_nncf_wrapped_model(module)
        self.input_infos = input_infos
        self.ignored_scopes = ignored_scopes
        self.target_scopes = target_scopes
        self._dummy_forward_fn = dummy_forward_fn
        self._nncf_module_scopes = []  # type: List[Scope]
        self.scopes_without_shape_matching = scopes_without_shape_matching
        self.debug_interface = CombinedDebugInterface() if is_debug() else None
        self._extra_module_types = []  # type: List[CompressionModuleType]
        # pylint:disable=line-too-long
        self._insertions_into_original_graph = {
        }  # type: Dict[InsertionPoint, List[Tuple[Callable, OperationPriority]]]

        device = next(module.parameters()).device

        # all modules should be replaced prior to graph building
        self._replace_modules_by_nncf_modules(device)

        _orig_context = TracingContext()
        _orig_graph_build_forward_fn = self._get_dummy_forward_fn_for_graph_building(
            with_input_tracing=True)

        self._graph_builder = GraphBuilder(_orig_graph_build_forward_fn)

        _orig_context.add_node_comparators([MODEL_INPUT_OP_NAME],
                                           ShapeIgnoringTensorMetaComparator())
        if self.scopes_without_shape_matching:
            _orig_context.add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())

        self._original_graph = self._graph_builder.build_graph(
            self.get_nncf_wrapped_model(), _orig_context)

        self._compressed_context = TracingContext()

        self._dummy_forward_fn = self._get_dummy_forward_fn_for_graph_building(
            with_input_tracing=False)

        self._compressed_context.add_node_comparators(
            [MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator())
        if self.scopes_without_shape_matching:
            self._compressed_context.add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())
        self._load_listener = None

        self._builders = []  # type: List['CompressionAlgorithmBuilder']
def context(name):
    ctx = get_context(name)
    ctx.enter()
    if is_debug():
        ctx.reset_node_call_counters()
    try:
        yield ctx
    finally:
        ctx.reset_scope_operator_call_counters()
        ctx.leave()
Beispiel #6
0
    def forward(self, x):
        if is_debug():
            self.call_count += 1
        # TODO: refactor to get rid of extra if's and calls on each forward
        if self.init_stage:
            return x
        self.set_level_ranges()
        if is_tracing_state():
            return self.run_export_quantization(x)

        return self.quantize(x)
Beispiel #7
0
    def apply_init(self):
        original_device = next(self._model.parameters()).device
        self._model.to(self._init_device)

        traces_per_layer = self._calc_traces(self._criterion,
                                             self._iter_number,
                                             self._tolerance)
        if not traces_per_layer:
            raise RuntimeError('Failed to calculate hessian traces!')

        num_weights = len(self._ordered_weight_quantizations)
        bits_configurations = self.get_configs_constrained_by_order(
            self._bits, num_weights)
        ordered_weight_quantization_ids = list(
            self._ordered_weight_quantizations.keys())
        bits_configurations = self._filter_configs_by_precision_constraints(
            bits_configurations, self._hw_precision_constraints,
            ordered_weight_quantization_ids,
            traces_per_layer.get_order_of_traces())
        if not bits_configurations:
            raise RuntimeError(
                'All bits configurations are incompatible with HW Config!')

        perturbations, weight_observers = self.calc_quantization_noise()

        configuration_metric = self.calc_hawq_metric_per_configuration(
            bits_configurations, perturbations, traces_per_layer,
            self._init_device)

        chosen_config_per_layer = self.choose_configuration(
            configuration_metric, bits_configurations,
            traces_per_layer.get_order_of_traces())
        self.set_chosen_config(chosen_config_per_layer)
        ordered_metric_per_layer = self.get_metric_per_layer(
            chosen_config_per_layer, perturbations, traces_per_layer)
        if is_debug():
            hawq_debugger = HAWQDebugger(bits_configurations, perturbations,
                                         weight_observers, traces_per_layer,
                                         self._bits)
            hawq_debugger.dump_metric(configuration_metric)
            hawq_debugger.dump_avg_traces()
            hawq_debugger.dump_density_of_quantization_noise()
            hawq_debugger.dump_perturbations_ratio()
            hawq_debugger.dump_bitwidth_graph(self._algo, self._model)

        self._model.rebuild_graph()
        str_bw = [str(element) for element in self.get_bitwidth_per_scope()]
        nncf_logger.info('\n'.join(
            ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']']))

        self._model.to(original_device)
        return ordered_metric_per_layer
    def apply_init(self):
        disabled_gradients = self.disable_quantizer_gradients(
            self._all_quantizers_per_scope,
            self._algo.quantized_weight_modules_registry, self._model)

        traces_per_layer = self._calc_traces(self._criterion,
                                             self._iter_number,
                                             self._tolerance)
        if not traces_per_layer:
            raise RuntimeError('Failed to calculate hessian traces!')

        self.enable_quantizer_gradients(self._model,
                                        self._all_quantizers_per_scope,
                                        disabled_gradients)

        num_weights = len(self._ordered_weight_quantizations)
        bits_configurations = self.get_configs_constrained_by_order(
            self._bits, num_weights)
        ordered_weight_quantization_ids = list(
            self._ordered_weight_quantizations.keys())
        bits_configurations = self.filter_configs_by_precision_constraints(
            bits_configurations, self._hw_precision_constraints,
            ordered_weight_quantization_ids,
            traces_per_layer.get_order_of_traces())
        if not bits_configurations:
            raise RuntimeError(
                'All bits configurations are incompatible with HW Config!')

        perturbations, weight_observers = self.calc_quantization_noise()

        configuration_metric = self.calc_hawq_metric_per_configuration(
            bits_configurations, perturbations, traces_per_layer, self._device)

        chosen_config_per_layer = self.choose_configuration(
            configuration_metric, bits_configurations,
            traces_per_layer.get_order_of_traces())
        self.set_chosen_config(chosen_config_per_layer)
        ordered_metric_per_layer = self.get_metric_per_layer(
            chosen_config_per_layer, perturbations, traces_per_layer)
        if is_debug():
            self.HAWQDump(bits_configurations, configuration_metric,
                          perturbations, weight_observers, traces_per_layer,
                          self._bits).run()

        self._model.rebuild_graph()
        str_bw = [str(element) for element in self.get_bitwidth_per_scope()]
        nncf_logger.info('\n'.join(
            ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']']))

        return ordered_metric_per_layer
Beispiel #9
0
    def wrapped(*args, **kwargs):
        ctx = get_current_context()
        if not ctx or getattr(ctx, 'in_operator', False) or not ctx.is_tracing:
            op1 = operator(*args, **kwargs)
            return op1

        ctx.in_operator = True

        if operator_info.custom_trace_fn is not None:
            try:
                result = operator_info.custom_trace_fn(operator, *args, **kwargs)
            except:
                # Looks like the __repr__ call made during IDE debug to display tensor contents does not exit properly,
                # but instead throws an exception. This try...except block handles such a situation.
                # Otherwise the context is stuck in the "in_operator == True" state.
                ctx.in_operator = False
                raise
        else:
            ia_op_exec_context = ctx.get_caller_context(operator_info.name)
            ctx.register_operator_call(ia_op_exec_context.operator_name, ia_op_exec_context.scope_in_model)

            op_input = OperatorInput(list(args), kwargs)
            processed_input = ctx.execute_pre_hooks(ia_op_exec_context, op_input)
            args = tuple(processed_input.op_args)
            kwargs = processed_input.op_kwargs
            fargs = flatten_args(args, kwargs)

            node = ctx.find_operator_node(fargs, ia_op_exec_context)
            if is_debug():
                ctx.register_node_call(ctx.graph.get_node_key_by_id(node.node_id))

            result = operator(*args, **kwargs)

            result = trace_tensors(result, node)
            result = ctx.execute_post_hooks(ia_op_exec_context, result)

        ctx.in_operator = False
        return result
    def run(self, criterion: _Loss, iter_number=200, tolerance=1e-5):
        disabled_gradients = self.disable_quantizer_gradients(self._all_quantizations,
                                                              self._algo.quantized_weight_modules_registry, self._model)

        traces_per_layer = self._calc_traces(criterion, iter_number, tolerance)

        self.enable_quantizer_gradients(self._model, self._all_quantizations, disabled_gradients)

        num_weights = len(self._weights_to_init)
        bits_configurations = self.get_constrained_configs(self._bits, num_weights)

        perturbations, weight_observers = self.calc_quantization_noise()

        configuration_metric = self.calc_hawq_metric_per_configuration(bits_configurations, perturbations,
                                                                       traces_per_layer, self._device)

        chosen_config_per_layer = self.choose_configuration(configuration_metric, bits_configurations,
                                                            traces_per_layer.get_order_of_traces())
        self.set_chosen_config(chosen_config_per_layer)
        ordered_metric_per_layer = self.get_metric_per_layer(chosen_config_per_layer, perturbations, traces_per_layer)
        if is_debug():
            self.HAWQDump(bits_configurations, configuration_metric, perturbations,
                          weight_observers, traces_per_layer, self._bits).run()
        return ordered_metric_per_layer
Beispiel #11
0
    def apply_init(self) -> SingleConfigQuantizerSetup:
        from nncf.automl.environment.quantization_env import QuantizationEnv
        from nncf.automl.agent.ddpg.ddpg import DDPG
        from nncf.debug import DEBUG_LOG_DIR

        if self._dump_autoq_data or is_debug():
            dump_dir = self._init_args.config.get('log_dir', None)
            if dump_dir is None:
                dump_dir = DEBUG_LOG_DIR
            self.dump_dir = Path(dump_dir) / Path("autoq_agent_dump")
            self.dump_dir.mkdir(parents=True, exist_ok=True)

            self.policy_dict = OrderedDict() #key: episode
            self.best_policy_dict = OrderedDict() #key: episode

            self._init_args.config['episodic_nncfcfg'] = self.dump_dir / "episodic_nncfcfg"
            os.makedirs(self._init_args.config['episodic_nncfcfg'], exist_ok=True)

            try:
                from torch.utils.tensorboard import SummaryWriter
                self.tb_writer = SummaryWriter(self.dump_dir)
                # log compression config to tensorboard
                self.tb_writer.add_text('AutoQ/run_config',
                                         json.dumps(self._init_args.config['compression'],
                                         indent=4, sort_keys=False).replace("\n", "\n\n"), 0)
            except ModuleNotFoundError:
                logger.warning("Tensorboard installation not found! Install tensorboard Python package "
                               "in order for AutoQ tensorboard statistics data to be dumped")

        start_ts = datetime.now()

        from nncf.automl.environment.quantization_env import QuantizationEnvParams
        env_params = QuantizationEnvParams(compression_ratio=self._params.compression_ratio,
            eval_subset_ratio=self._params.eval_subset_ratio,
            skip_constraint=self._params.skip_constraint,
            finetune=self._params.finetune,
            bits=self._params.bits,
            dump_init_precision_data=self._dump_autoq_data,
            log_dir=Path(DEBUG_LOG_DIR) / Path("autoq"))

        # Instantiate Quantization Environment
        env = QuantizationEnv(
            self._model,
            self.quantization_controller,
            self._hw_precision_constraints,
            self._init_args.data_loader,
            self._init_args.eval_fn,
            hw_config_type=self._hw_cfg_type,
            params=env_params)

        nb_state = len(env.state_list)
        nb_action = 1

        # Instantiate Automation Agent
        agent = DDPG(nb_state, nb_action, self._iter_number, hparam_override=self._ddpg_hparams_override)

        if self._dump_autoq_data and self.tb_writer is not None:
            self.tb_writer.add_text('AutoQ/state_embedding', env.master_df[env.state_list].to_markdown())

        best_policy, best_reward = self._search(agent, env)

        end_ts = datetime.now()

        final_qid_vs_qconfig_map = env.select_config_for_actions(best_policy)

        final_quantizer_setup = self.quantization_controller.get_quantizer_setup_for_current_state()
        for qp_id, qconf in final_qid_vs_qconfig_map.items():
            final_quantizer_setup.quantization_points[qp_id].qconfig = qconf

        logger.info('[AutoQ] best_reward: {}'.format(best_reward))
        logger.info('[AutoQ] best_policy: {}'.format(best_policy))
        logger.info("[AutoQ] Search Complete")
        logger.info("[AutoQ] Elapsed time of AutoQ Precision Initialization (): {}".format(end_ts-start_ts))
        return final_quantizer_setup
Beispiel #12
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.dump_graph(osp.join(config.get("log_dir", "."),
                                      "original_graph.dot"),
                             extended=True)

    if is_debug():
        set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    if dump_graphs and is_main_process() and compression_algo_builder_list:
        if dummy_forward_fn is None:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=False))
        else:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=dummy_forward_fn)

        graph = compressed_graph_builder.build_graph(
            compressed_model, compressed_model.get_tracing_context())
        graph.dump_graph(osp.join(config.get("log_dir", "."),
                                  "compressed_graph.dot"),
                         extended=True)

    if resuming_state_dict is not None:
        load_state(compressed_model, resuming_state_dict, is_resume=True)

    return compression_ctrl, compressed_model
    def __init__(self,
                 module,
                 quantize_module_creator_fn,
                 input_infos=None,
                 dummy_forward_fn=None,
                 ignored_scopes=None,
                 target_scopes=None,
                 quantize_inputs=True,
                 quantize_outputs=False,
                 quantizable_subgraph_patterns=None,
                 scopes_without_shape_matching=None,
                 disable_function_quantization_hooks=False):
        super().__init__()
        self.set_nncf_wrapped_module(module)
        self.quantize_inputs = quantize_inputs
        self.quantize_outputs = quantize_outputs
        self.input_infos = input_infos
        self.ignored_scopes = ignored_scopes
        self.target_scopes = target_scopes
        self.activation_quantizers = nn.ModuleDict()
        self.function_quantizers = nn.ModuleDict()
        self.quantized_weight_modules = OrderedDict()
        self.quantized_activation_modules = OrderedDict()
        self.quantize_module_creator_fn = quantize_module_creator_fn
        self.quantizable_subgraph_patterns = quantizable_subgraph_patterns
        self._dummy_forward_fn = dummy_forward_fn
        self._nncf_module_scopes = []  # type: List[Scope]
        self.debug_interface = QuantizationDebugInterface() if is_debug(
        ) else None
        self.scopes_without_shape_matching = scopes_without_shape_matching

        device = next(module.parameters()).device

        self.all_quantizations = OrderedDict()
        self._processed_input_agnostic_op_exec_contexts = set()
        self._processed_function_quantizers = set()

        # all modules should be replaced prior to graph building
        self._replace_quantized_modules_by_nncf_modules(device)
        self._register_weight_quantization_operations(device)

        if self._dummy_forward_fn is None:
            self._dummy_forward_fn = create_dummy_forward_fn(self.input_infos)

        self._graph_builder = GraphBuilder(
            custom_forward_fn=self._dummy_forward_fn)

        self._context_name = "orig"
        if self.scopes_without_shape_matching:
            get_context(self._context_name).add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())

        self._original_graph = self._graph_builder.build_graph(
            self, self._context_name)

        self._context_name = "quantized_graphs"
        self._ctx = get_context("quantized_graphs")
        if self.scopes_without_shape_matching:
            get_context(self._context_name).add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())

        self._register_activation_quantization_hooks(device)
        if self.quantize_inputs:
            self._register_input_quantization_operations(device)

        if not disable_function_quantization_hooks:
            self._register_function_quantization_hooks(device)

        quantization_types = [
            class_type.__name__
            for class_type in QUANTIZATION_MODULES.registry_dict.values()
        ]
        self.all_quantizations = get_state_dict_names_with_modules(
            self, quantization_types)
        self.load_listener = LoadStateListener(self, self.all_quantizations)
        if self.debug_interface is not None:
            self.debug_interface.init_actual(self.all_quantizations.keys(),
                                             self.activation_quantizers.keys(),
                                             self.function_quantizers.keys())
Beispiel #14
0
    def apply_init(self):
        if not self._quantizers_handler.get_weight_quantizers_in_execution_order_per_id(
        ):
            return None
        original_device = next(self._model.parameters()).device
        self._model.to(self._init_device)

        traces_per_layer = self._calc_traces(self._criterion_fn,
                                             self._criterion,
                                             self._iter_number,
                                             self._tolerance)
        if not traces_per_layer:
            raise RuntimeError('Failed to calculate hessian traces!')

        traces_order = traces_per_layer.traces_order
        num_weights = len(self._weight_quantizations_by_execution_order)
        bits_configurations = self.get_configs_constrained_by_traces_order(
            self._bits, num_weights)

        weight_quantizer_ids_in_execution_order = list(
            self._weight_quantizations_by_execution_order.keys())

        if self._bitwidth_assignment_mode == BitwidthAssignmentMode.STRICT:
            self._merge_constraints_for_adjacent_quantizers(
                self._groups_of_adjacent_quantizers,
                self._hw_precision_constraints)

        bits_configurations = self._filter_configs_by_precision_constraints(
            bits_configurations, self._hw_precision_constraints,
            weight_quantizer_ids_in_execution_order, traces_order)
        if not bits_configurations:
            warnings.warn(
                'All bits configurations are incompatible with HW Config!',
                RuntimeWarning)
            return None

        if self._bitwidth_assignment_mode == BitwidthAssignmentMode.STRICT:
            bits_configurations = \
                self._filter_configs_by_grouped_weight_quantizers(bits_configurations,
                                                                  weight_quantizer_ids_in_execution_order,
                                                                  self._groups_of_adjacent_quantizers,
                                                                  traces_order)
        if not bits_configurations:
            warnings.warn(
                'No bits configurations are left after removing inconsistent groups of weight quantizers'
                ' with adjacent activation quantizers!', RuntimeWarning)
            return None

        flops_bits_per_config = self.get_flops_bits_per_config(
            bits_configurations, traces_order)
        min_ratio = min(flops_bits_per_config)
        max_ratio = max(flops_bits_per_config)
        if not min_ratio <= self._compression_ratio <= max_ratio:
            raise AttributeError(
                'Invalid compression ratio={}. Should be within range [{:.3f}, {:.3f}]'
                .format(self._compression_ratio, min_ratio, max_ratio))

        perturbations, weight_observers = self.calc_quantization_noise()

        configuration_metric = self.calc_hawq_metric_per_configuration(
            bits_configurations, perturbations, traces_per_layer,
            self._init_device)

        config_index = self.choose_configuration(configuration_metric,
                                                 flops_bits_per_config)
        chosen_config_in_traces_order = bits_configurations[config_index]
        chosen_config_in_execution_order = traces_order.get_execution_order_config(
            chosen_config_in_traces_order)
        nncf_logger.info(
            'Chosen HAWQ configuration with ratio={:.2f}, bitwidth per weightable layer={}'
            .format(flops_bits_per_config[config_index],
                    chosen_config_in_execution_order))
        nncf_logger.debug(
            'Order of the weightable layers in the HAWQ configuration (in descending order of average '
            'Hessian traces) ={}'.format(traces_order))

        self.set_chosen_config(chosen_config_in_execution_order)
        self._model.rebuild_graph()
        if is_debug() or self._dump_hawq_data:
            hawq_debugger = HAWQDebugger(bits_configurations, perturbations,
                                         weight_observers, traces_per_layer,
                                         self._bits)
            hawq_debugger.dump_metric_MB(configuration_metric)
            hawq_debugger.dump_metric_flops(configuration_metric,
                                            flops_bits_per_config,
                                            config_index)
            hawq_debugger.dump_avg_traces()
            hawq_debugger.dump_density_of_quantization_noise()
            hawq_debugger.dump_perturbations_ratio()
            hawq_debugger.dump_bitwidth_graph(
                self._algo, self._model, self._groups_of_adjacent_quantizers)
        str_bw = [str(element) for element in self.get_bitwidth_per_scope()]
        nncf_logger.info('\n'.join(
            ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']']))

        self._model.to(original_device)

        ordered_metric_per_layer = self.get_metric_per_layer(
            chosen_config_in_execution_order, perturbations, traces_per_layer)
        return ordered_metric_per_layer
Beispiel #15
0
    def __init__(self,
                 module,
                 input_infos: List[ModelInputInfo],
                 dummy_forward_fn=None,
                 wrap_inputs_fn=None,
                 scopes_without_shape_matching=None,
                 ignored_scopes=None,
                 target_scopes=None,
                 reset: bool = False):
        super().__init__()
        self._set_nncf_wrapped_model(module)
        self._forward_signature = inspect.signature(module.forward)
        self.input_infos = input_infos

        self.ignored_scopes = ignored_scopes
        self.target_scopes = target_scopes
        self._user_dummy_forward_fn = dummy_forward_fn

        device = next(module.parameters()).device

        if wrap_inputs_fn is not None:
            self._wrap_inputs_fn = wrap_inputs_fn
        else:
            self.__input_infos_based_input_wrapper = InputInfoWrapManager(
                self.input_infos,
                self._forward_signature,
                module_ref_for_device=self)
            self._wrap_inputs_fn = self.__input_infos_based_input_wrapper.wrap_inputs

        self._nncf_module_scopes = []  # type: List[Scope]
        self.scopes_without_shape_matching = scopes_without_shape_matching
        self.debug_interface = CombinedDebugInterface() if is_debug() else None
        self._extra_module_types = []  # type: List[ExtraCompressionModuleType]
        # pylint:disable=line-too-long
        self._insertions_into_original_graph = {
        }  # type: Dict[InsertionPoint, List[Tuple[Callable, OperationPriority]]]

        _orig_graph_build_forward_fn = self._get_dummy_forward_fn_for_graph_building(
            with_input_tracing=True)
        self._graph_builder = GraphBuilder(_orig_graph_build_forward_fn)

        nncf_wrapped_model = self.get_nncf_wrapped_model()
        eval_only_ops_exec_ctx = self.collect_eval_only_ops_exec_context(
            nncf_wrapped_model, self._graph_builder)

        # all modules called in eval mode should be replaced prior to graph building
        self._replace_modules_by_nncf_modules(device, eval_only_ops_exec_ctx,
                                              reset)

        _orig_context = TracingContext()

        _orig_context.add_node_comparators([MODEL_INPUT_OP_NAME],
                                           ShapeIgnoringTensorMetaComparator())
        if self.scopes_without_shape_matching:
            _orig_context.add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())

        self._original_graph = self._graph_builder.build_graph(
            nncf_wrapped_model, _orig_context, as_eval=True)

        self._compressed_context = TracingContext()

        self._dummy_forward_fn = self._get_dummy_forward_fn_for_graph_building(
            with_input_tracing=False)

        self._compressed_context.add_node_comparators(
            [MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator())
        if self.scopes_without_shape_matching:
            self._compressed_context.add_node_comparators(
                scopes_without_shape_matching,
                ShapeIgnoringTensorMetaComparator())
        self._load_listener = None

        self._builders = []  # type: List['CompressionAlgorithmBuilder']
Beispiel #16
0
    def __init__(self, model: NNCFNetwork,
                 quantization_controller: ExperimentalQuantizationController,
                 hw_precision_constraints: HardwareQuantizationConstraints,
                 eval_loader: torch.utils.data.DataLoader,
                 eval_fn: Callable[[nn.Module, torch.utils.data.DataLoader],
                                   float], hw_config_type: HWConfigType,
                 params: QuantizationEnvParams):

        logger.info("[Q.Env] Instantiating NNCF Quantization Environment")
        self.qctrl = quantization_controller
        self.qmodel = model
        self.eval_loader = eval_loader
        self.eval_fn = eval_fn
        self._hw_precision_constraints = hw_precision_constraints

        self.model_name = self.qmodel.nncf_module.__class__.__name__

        # Check and only proceed if target device is supported by Q.Env
        self.hw_cfg_type = hw_config_type
        assert self.hw_cfg_type in [None, HWConfigType.VPU]

        # Set target compression ratio
        self.compression_ratio = params.compression_ratio

        self.eval_loader = PartialDataLoader(
            self.eval_loader, iter_ratio=params.eval_subset_ratio)

        # Bool to disable hard resource constraint
        self.skip_constraint = params.skip_constraint

        # Bool to enable fine-tuning in each episode. Placeholder for now
        self.finetune = params.skip_constraint

        # Configure search space for precision according to target device
        if self.hw_cfg_type is None:
            self.model_bitwidth_space = params.bits
        elif self.hw_cfg_type is HWConfigType.VPU:
            self.model_bitwidth_space = self._hw_precision_constraints.get_all_unique_bits(
            )
        self.model_bitwidth_space = sorted(list(self.model_bitwidth_space))

        # Create mapping of QuantizerId to the space of the corresponding quantizer's allowed qconfigs
        #pylint:disable=line-too-long
        self.qconfig_space_map = OrderedDict.fromkeys(
            self.qctrl.all_quantizations.keys(
            ))  # type: Dict[QuantizerId, List[QuantizerConfig]]
        if self.hw_cfg_type is None:
            for qid in self.qconfig_space_map.keys():
                conf = self.qctrl.all_quantizations[qid].get_current_config()
                conf_list_to_set = []
                for bit in self.model_bitwidth_space:
                    bit_adjusted_conf = deepcopy(conf)
                    bit_adjusted_conf.bits = bit
                    conf_list_to_set.append(bit_adjusted_conf)
                self.qconfig_space_map[qid] = conf_list_to_set
        else:
            for qid in self.qconfig_space_map:
                self.qconfig_space_map[
                    qid] = self._hw_precision_constraints.get(qid)

        # Quantizer Master Table Creation
        self._groups_of_adjacent_quantizers = self.qctrl._groups_of_adjacent_quantizers
        self.quantizer_table = self._create_quantizer_table()

        # Create master dataframe to keep track of quantizable layers and their attributes
        self.master_df, self.state_list = self._get_state_space(
            self.qctrl, self.qmodel, self.quantizer_table)
        if self.master_df.isnull().values.any():
            raise ValueError("Q.Env Master Dataframe has null value(s)")

        assert len(self.quantizer_table) == len(self.qctrl.all_quantizations), \
            "Number of Quantizer is not tally between quantizer table and quantization controller"

        # MinMaxScaler for State Embedding
        self.state_scaler = MinMaxScaler()
        self.state_scaler.fit(self.master_df[self.state_list])

        # Model Size Calculation
        self.model_size_calculator = ModelSizeCalculator(
            self.qmodel, self.qconfig_space_map)
        self.orig_model_size = self.model_size_calculator.fp_model_size
        self.min_model_size = self.model_size_calculator.min_model_size
        self.max_model_size = self.model_size_calculator.max_model_size
        self.target_model_size = self.orig_model_size * self.compression_ratio

        if self.target_model_size < self.min_model_size and self.target_model_size > self.max_model_size:
            raise ValueError(
                "Model Size Ratio {} is out of bound ({}, {})".format(
                    self.compression_ratio,
                    self.min_model_size / self.orig_model_size,
                    self.max_model_size / self.orig_model_size))

        # Evaluate and store metric score of pretrained model
        self._evaluate_pretrained_model()
        self.qmodel_init_sd = deepcopy(self.qmodel.state_dict())

        self.reset()

        self._dump_autoq_data = params.dump_init_precision_data
        if self._dump_autoq_data or is_debug():
            dump_dir = params.log_dir
            if dump_dir is None:
                dump_dir = DEBUG_LOG_DIR
            self.dump_dir = Path(dump_dir) / Path("autoq_env_dump")
            self.dump_dir.mkdir(parents=True, exist_ok=True)
            # Serialize Q.Env information. Note that these functions should be at the end of Q.Env Initialization.
            self._dump_master_df()
            self._dump_quantized_graph()
            self._dump_groups_of_adjacent_quantizers()
Beispiel #17
0
 def forward(self, x):
     if is_debug():
         self.call_count += 1
     if self.init_stage:
         return x
     return self.quantize(x)