def forward(self, x): if is_debug(): self.call_count += 1 if self.init_stage: return x self.set_level_ranges() return self.quantize(x)
def wrapped(*args, **kwargs): ctx = get_current_context() if not ctx or getattr(ctx, 'in_operator', False) or not ctx.is_tracing: op1 = operator(*args, **kwargs) return op1 ctx.in_operator = True if operator_info.custom_trace_fn is not None: result = operator_info.custom_trace_fn(operator, *args, **kwargs) else: ia_op_exec_context = ctx.get_caller_context(operator_info.name) ctx.register_operator_call(ia_op_exec_context.operator_name, ia_op_exec_context.scope_in_model) op_input = OperatorInput(list(args), kwargs) processed_input = ctx.execute_pre_hooks(ia_op_exec_context, op_input) args = tuple(processed_input.op_args) kwargs = processed_input.op_kwargs fargs = flatten_args(args, kwargs) node = ctx.find_operator_node(fargs, ia_op_exec_context) if is_debug(): ctx.register_node_call(ctx.graph.get_node_key_by_id(node.node_id)) result = operator(*args, **kwargs) result = trace_tensors(result, node) result = ctx.execute_post_hooks(ia_op_exec_context, result) ctx.in_operator = False return result
def __enter__(self): global _CURRENT_CONTEXT self._save_context = _CURRENT_CONTEXT _CURRENT_CONTEXT = self self._init_thread_local() if is_debug(): self.reset_node_call_counters() return self
def __init__(self, module, input_infos: List[ModelInputInfo] = None, dummy_forward_fn=None, scopes_without_shape_matching=None, ignored_scopes=None, target_scopes=None): super().__init__() self.set_nncf_wrapped_model(module) self.input_infos = input_infos self.ignored_scopes = ignored_scopes self.target_scopes = target_scopes self._dummy_forward_fn = dummy_forward_fn self._nncf_module_scopes = [] # type: List[Scope] self.scopes_without_shape_matching = scopes_without_shape_matching self.debug_interface = CombinedDebugInterface() if is_debug() else None self._extra_module_types = [] # type: List[CompressionModuleType] # pylint:disable=line-too-long self._insertions_into_original_graph = { } # type: Dict[InsertionPoint, List[Tuple[Callable, OperationPriority]]] device = next(module.parameters()).device # all modules should be replaced prior to graph building self._replace_modules_by_nncf_modules(device) _orig_context = TracingContext() _orig_graph_build_forward_fn = self._get_dummy_forward_fn_for_graph_building( with_input_tracing=True) self._graph_builder = GraphBuilder(_orig_graph_build_forward_fn) _orig_context.add_node_comparators([MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator()) if self.scopes_without_shape_matching: _orig_context.add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._original_graph = self._graph_builder.build_graph( self.get_nncf_wrapped_model(), _orig_context) self._compressed_context = TracingContext() self._dummy_forward_fn = self._get_dummy_forward_fn_for_graph_building( with_input_tracing=False) self._compressed_context.add_node_comparators( [MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator()) if self.scopes_without_shape_matching: self._compressed_context.add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._load_listener = None self._builders = [] # type: List['CompressionAlgorithmBuilder']
def context(name): ctx = get_context(name) ctx.enter() if is_debug(): ctx.reset_node_call_counters() try: yield ctx finally: ctx.reset_scope_operator_call_counters() ctx.leave()
def forward(self, x): if is_debug(): self.call_count += 1 # TODO: refactor to get rid of extra if's and calls on each forward if self.init_stage: return x self.set_level_ranges() if is_tracing_state(): return self.run_export_quantization(x) return self.quantize(x)
def apply_init(self): original_device = next(self._model.parameters()).device self._model.to(self._init_device) traces_per_layer = self._calc_traces(self._criterion, self._iter_number, self._tolerance) if not traces_per_layer: raise RuntimeError('Failed to calculate hessian traces!') num_weights = len(self._ordered_weight_quantizations) bits_configurations = self.get_configs_constrained_by_order( self._bits, num_weights) ordered_weight_quantization_ids = list( self._ordered_weight_quantizations.keys()) bits_configurations = self._filter_configs_by_precision_constraints( bits_configurations, self._hw_precision_constraints, ordered_weight_quantization_ids, traces_per_layer.get_order_of_traces()) if not bits_configurations: raise RuntimeError( 'All bits configurations are incompatible with HW Config!') perturbations, weight_observers = self.calc_quantization_noise() configuration_metric = self.calc_hawq_metric_per_configuration( bits_configurations, perturbations, traces_per_layer, self._init_device) chosen_config_per_layer = self.choose_configuration( configuration_metric, bits_configurations, traces_per_layer.get_order_of_traces()) self.set_chosen_config(chosen_config_per_layer) ordered_metric_per_layer = self.get_metric_per_layer( chosen_config_per_layer, perturbations, traces_per_layer) if is_debug(): hawq_debugger = HAWQDebugger(bits_configurations, perturbations, weight_observers, traces_per_layer, self._bits) hawq_debugger.dump_metric(configuration_metric) hawq_debugger.dump_avg_traces() hawq_debugger.dump_density_of_quantization_noise() hawq_debugger.dump_perturbations_ratio() hawq_debugger.dump_bitwidth_graph(self._algo, self._model) self._model.rebuild_graph() str_bw = [str(element) for element in self.get_bitwidth_per_scope()] nncf_logger.info('\n'.join( ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']'])) self._model.to(original_device) return ordered_metric_per_layer
def apply_init(self): disabled_gradients = self.disable_quantizer_gradients( self._all_quantizers_per_scope, self._algo.quantized_weight_modules_registry, self._model) traces_per_layer = self._calc_traces(self._criterion, self._iter_number, self._tolerance) if not traces_per_layer: raise RuntimeError('Failed to calculate hessian traces!') self.enable_quantizer_gradients(self._model, self._all_quantizers_per_scope, disabled_gradients) num_weights = len(self._ordered_weight_quantizations) bits_configurations = self.get_configs_constrained_by_order( self._bits, num_weights) ordered_weight_quantization_ids = list( self._ordered_weight_quantizations.keys()) bits_configurations = self.filter_configs_by_precision_constraints( bits_configurations, self._hw_precision_constraints, ordered_weight_quantization_ids, traces_per_layer.get_order_of_traces()) if not bits_configurations: raise RuntimeError( 'All bits configurations are incompatible with HW Config!') perturbations, weight_observers = self.calc_quantization_noise() configuration_metric = self.calc_hawq_metric_per_configuration( bits_configurations, perturbations, traces_per_layer, self._device) chosen_config_per_layer = self.choose_configuration( configuration_metric, bits_configurations, traces_per_layer.get_order_of_traces()) self.set_chosen_config(chosen_config_per_layer) ordered_metric_per_layer = self.get_metric_per_layer( chosen_config_per_layer, perturbations, traces_per_layer) if is_debug(): self.HAWQDump(bits_configurations, configuration_metric, perturbations, weight_observers, traces_per_layer, self._bits).run() self._model.rebuild_graph() str_bw = [str(element) for element in self.get_bitwidth_per_scope()] nncf_logger.info('\n'.join( ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']'])) return ordered_metric_per_layer
def wrapped(*args, **kwargs): ctx = get_current_context() if not ctx or getattr(ctx, 'in_operator', False) or not ctx.is_tracing: op1 = operator(*args, **kwargs) return op1 ctx.in_operator = True if operator_info.custom_trace_fn is not None: try: result = operator_info.custom_trace_fn(operator, *args, **kwargs) except: # Looks like the __repr__ call made during IDE debug to display tensor contents does not exit properly, # but instead throws an exception. This try...except block handles such a situation. # Otherwise the context is stuck in the "in_operator == True" state. ctx.in_operator = False raise else: ia_op_exec_context = ctx.get_caller_context(operator_info.name) ctx.register_operator_call(ia_op_exec_context.operator_name, ia_op_exec_context.scope_in_model) op_input = OperatorInput(list(args), kwargs) processed_input = ctx.execute_pre_hooks(ia_op_exec_context, op_input) args = tuple(processed_input.op_args) kwargs = processed_input.op_kwargs fargs = flatten_args(args, kwargs) node = ctx.find_operator_node(fargs, ia_op_exec_context) if is_debug(): ctx.register_node_call(ctx.graph.get_node_key_by_id(node.node_id)) result = operator(*args, **kwargs) result = trace_tensors(result, node) result = ctx.execute_post_hooks(ia_op_exec_context, result) ctx.in_operator = False return result
def run(self, criterion: _Loss, iter_number=200, tolerance=1e-5): disabled_gradients = self.disable_quantizer_gradients(self._all_quantizations, self._algo.quantized_weight_modules_registry, self._model) traces_per_layer = self._calc_traces(criterion, iter_number, tolerance) self.enable_quantizer_gradients(self._model, self._all_quantizations, disabled_gradients) num_weights = len(self._weights_to_init) bits_configurations = self.get_constrained_configs(self._bits, num_weights) perturbations, weight_observers = self.calc_quantization_noise() configuration_metric = self.calc_hawq_metric_per_configuration(bits_configurations, perturbations, traces_per_layer, self._device) chosen_config_per_layer = self.choose_configuration(configuration_metric, bits_configurations, traces_per_layer.get_order_of_traces()) self.set_chosen_config(chosen_config_per_layer) ordered_metric_per_layer = self.get_metric_per_layer(chosen_config_per_layer, perturbations, traces_per_layer) if is_debug(): self.HAWQDump(bits_configurations, configuration_metric, perturbations, weight_observers, traces_per_layer, self._bits).run() return ordered_metric_per_layer
def apply_init(self) -> SingleConfigQuantizerSetup: from nncf.automl.environment.quantization_env import QuantizationEnv from nncf.automl.agent.ddpg.ddpg import DDPG from nncf.debug import DEBUG_LOG_DIR if self._dump_autoq_data or is_debug(): dump_dir = self._init_args.config.get('log_dir', None) if dump_dir is None: dump_dir = DEBUG_LOG_DIR self.dump_dir = Path(dump_dir) / Path("autoq_agent_dump") self.dump_dir.mkdir(parents=True, exist_ok=True) self.policy_dict = OrderedDict() #key: episode self.best_policy_dict = OrderedDict() #key: episode self._init_args.config['episodic_nncfcfg'] = self.dump_dir / "episodic_nncfcfg" os.makedirs(self._init_args.config['episodic_nncfcfg'], exist_ok=True) try: from torch.utils.tensorboard import SummaryWriter self.tb_writer = SummaryWriter(self.dump_dir) # log compression config to tensorboard self.tb_writer.add_text('AutoQ/run_config', json.dumps(self._init_args.config['compression'], indent=4, sort_keys=False).replace("\n", "\n\n"), 0) except ModuleNotFoundError: logger.warning("Tensorboard installation not found! Install tensorboard Python package " "in order for AutoQ tensorboard statistics data to be dumped") start_ts = datetime.now() from nncf.automl.environment.quantization_env import QuantizationEnvParams env_params = QuantizationEnvParams(compression_ratio=self._params.compression_ratio, eval_subset_ratio=self._params.eval_subset_ratio, skip_constraint=self._params.skip_constraint, finetune=self._params.finetune, bits=self._params.bits, dump_init_precision_data=self._dump_autoq_data, log_dir=Path(DEBUG_LOG_DIR) / Path("autoq")) # Instantiate Quantization Environment env = QuantizationEnv( self._model, self.quantization_controller, self._hw_precision_constraints, self._init_args.data_loader, self._init_args.eval_fn, hw_config_type=self._hw_cfg_type, params=env_params) nb_state = len(env.state_list) nb_action = 1 # Instantiate Automation Agent agent = DDPG(nb_state, nb_action, self._iter_number, hparam_override=self._ddpg_hparams_override) if self._dump_autoq_data and self.tb_writer is not None: self.tb_writer.add_text('AutoQ/state_embedding', env.master_df[env.state_list].to_markdown()) best_policy, best_reward = self._search(agent, env) end_ts = datetime.now() final_qid_vs_qconfig_map = env.select_config_for_actions(best_policy) final_quantizer_setup = self.quantization_controller.get_quantizer_setup_for_current_state() for qp_id, qconf in final_qid_vs_qconfig_map.items(): final_quantizer_setup.quantization_points[qp_id].qconfig = qconf logger.info('[AutoQ] best_reward: {}'.format(best_reward)) logger.info('[AutoQ] best_policy: {}'.format(best_policy)) logger.info("[AutoQ] Search Complete") logger.info("[AutoQ] Elapsed time of AutoQ Precision Initialization (): {}".format(end_ts-start_ts)) return final_quantizer_setup
def create_compressed_model(model: Module, config: NNCFConfig, resuming_state_dict: dict = None, dummy_forward_fn: Callable[[Module], Any] = None, dump_graphs=True,) \ -> Tuple[CompressionAlgorithmController, NNCFNetwork]: """ The main function used to produce a model ready for compression fine-tuning from an original PyTorch model and a configuration object. dummy_forward_fn :param model: The original model. Should have its parameters already loaded from a checkpoint or another source. :param config: A configuration object used to determine the exact compression modifications to be applied to the model :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after building. :param dummy_forward_fn: will be used instead of a *forward* function call to build the internal graph representation via tracing. Specifying this is useful when the original training pipeline has special formats of data loader output or has additional *forward* arguments other than input tensors. Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according to the shape specified in the config object. :param dump_graphs: Whether or not should also dump the internal graph representation of the original and compressed models in the .dot format into the log directory. :return: A controller for the compression algorithm (or algorithms, in which case the controller is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped as an object of NNCFNetwork.""" if dump_graphs: if dummy_forward_fn is None: input_info_list = create_input_infos(config) graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=True)) else: graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn) if is_main_process(): graph = graph_builder.build_graph(model) graph.dump_graph(osp.join(config.get("log_dir", "."), "original_graph.dot"), extended=True) if is_debug(): set_debug_log_dir(config.get("log_dir", ".")) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') compressed_model = NNCFNetwork( model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching) should_init = resuming_state_dict is None compression_algo_builder_list = create_compression_algorithm_builders( config, should_init=should_init) for builder in compression_algo_builder_list: compressed_model = builder.apply_to(compressed_model) compression_ctrl = compressed_model.commit_compression_changes() if dump_graphs and is_main_process() and compression_algo_builder_list: if dummy_forward_fn is None: compressed_graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=False)) else: compressed_graph_builder = GraphBuilder( custom_forward_fn=dummy_forward_fn) graph = compressed_graph_builder.build_graph( compressed_model, compressed_model.get_tracing_context()) graph.dump_graph(osp.join(config.get("log_dir", "."), "compressed_graph.dot"), extended=True) if resuming_state_dict is not None: load_state(compressed_model, resuming_state_dict, is_resume=True) return compression_ctrl, compressed_model
def __init__(self, module, quantize_module_creator_fn, input_infos=None, dummy_forward_fn=None, ignored_scopes=None, target_scopes=None, quantize_inputs=True, quantize_outputs=False, quantizable_subgraph_patterns=None, scopes_without_shape_matching=None, disable_function_quantization_hooks=False): super().__init__() self.set_nncf_wrapped_module(module) self.quantize_inputs = quantize_inputs self.quantize_outputs = quantize_outputs self.input_infos = input_infos self.ignored_scopes = ignored_scopes self.target_scopes = target_scopes self.activation_quantizers = nn.ModuleDict() self.function_quantizers = nn.ModuleDict() self.quantized_weight_modules = OrderedDict() self.quantized_activation_modules = OrderedDict() self.quantize_module_creator_fn = quantize_module_creator_fn self.quantizable_subgraph_patterns = quantizable_subgraph_patterns self._dummy_forward_fn = dummy_forward_fn self._nncf_module_scopes = [] # type: List[Scope] self.debug_interface = QuantizationDebugInterface() if is_debug( ) else None self.scopes_without_shape_matching = scopes_without_shape_matching device = next(module.parameters()).device self.all_quantizations = OrderedDict() self._processed_input_agnostic_op_exec_contexts = set() self._processed_function_quantizers = set() # all modules should be replaced prior to graph building self._replace_quantized_modules_by_nncf_modules(device) self._register_weight_quantization_operations(device) if self._dummy_forward_fn is None: self._dummy_forward_fn = create_dummy_forward_fn(self.input_infos) self._graph_builder = GraphBuilder( custom_forward_fn=self._dummy_forward_fn) self._context_name = "orig" if self.scopes_without_shape_matching: get_context(self._context_name).add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._original_graph = self._graph_builder.build_graph( self, self._context_name) self._context_name = "quantized_graphs" self._ctx = get_context("quantized_graphs") if self.scopes_without_shape_matching: get_context(self._context_name).add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._register_activation_quantization_hooks(device) if self.quantize_inputs: self._register_input_quantization_operations(device) if not disable_function_quantization_hooks: self._register_function_quantization_hooks(device) quantization_types = [ class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values() ] self.all_quantizations = get_state_dict_names_with_modules( self, quantization_types) self.load_listener = LoadStateListener(self, self.all_quantizations) if self.debug_interface is not None: self.debug_interface.init_actual(self.all_quantizations.keys(), self.activation_quantizers.keys(), self.function_quantizers.keys())
def apply_init(self): if not self._quantizers_handler.get_weight_quantizers_in_execution_order_per_id( ): return None original_device = next(self._model.parameters()).device self._model.to(self._init_device) traces_per_layer = self._calc_traces(self._criterion_fn, self._criterion, self._iter_number, self._tolerance) if not traces_per_layer: raise RuntimeError('Failed to calculate hessian traces!') traces_order = traces_per_layer.traces_order num_weights = len(self._weight_quantizations_by_execution_order) bits_configurations = self.get_configs_constrained_by_traces_order( self._bits, num_weights) weight_quantizer_ids_in_execution_order = list( self._weight_quantizations_by_execution_order.keys()) if self._bitwidth_assignment_mode == BitwidthAssignmentMode.STRICT: self._merge_constraints_for_adjacent_quantizers( self._groups_of_adjacent_quantizers, self._hw_precision_constraints) bits_configurations = self._filter_configs_by_precision_constraints( bits_configurations, self._hw_precision_constraints, weight_quantizer_ids_in_execution_order, traces_order) if not bits_configurations: warnings.warn( 'All bits configurations are incompatible with HW Config!', RuntimeWarning) return None if self._bitwidth_assignment_mode == BitwidthAssignmentMode.STRICT: bits_configurations = \ self._filter_configs_by_grouped_weight_quantizers(bits_configurations, weight_quantizer_ids_in_execution_order, self._groups_of_adjacent_quantizers, traces_order) if not bits_configurations: warnings.warn( 'No bits configurations are left after removing inconsistent groups of weight quantizers' ' with adjacent activation quantizers!', RuntimeWarning) return None flops_bits_per_config = self.get_flops_bits_per_config( bits_configurations, traces_order) min_ratio = min(flops_bits_per_config) max_ratio = max(flops_bits_per_config) if not min_ratio <= self._compression_ratio <= max_ratio: raise AttributeError( 'Invalid compression ratio={}. Should be within range [{:.3f}, {:.3f}]' .format(self._compression_ratio, min_ratio, max_ratio)) perturbations, weight_observers = self.calc_quantization_noise() configuration_metric = self.calc_hawq_metric_per_configuration( bits_configurations, perturbations, traces_per_layer, self._init_device) config_index = self.choose_configuration(configuration_metric, flops_bits_per_config) chosen_config_in_traces_order = bits_configurations[config_index] chosen_config_in_execution_order = traces_order.get_execution_order_config( chosen_config_in_traces_order) nncf_logger.info( 'Chosen HAWQ configuration with ratio={:.2f}, bitwidth per weightable layer={}' .format(flops_bits_per_config[config_index], chosen_config_in_execution_order)) nncf_logger.debug( 'Order of the weightable layers in the HAWQ configuration (in descending order of average ' 'Hessian traces) ={}'.format(traces_order)) self.set_chosen_config(chosen_config_in_execution_order) self._model.rebuild_graph() if is_debug() or self._dump_hawq_data: hawq_debugger = HAWQDebugger(bits_configurations, perturbations, weight_observers, traces_per_layer, self._bits) hawq_debugger.dump_metric_MB(configuration_metric) hawq_debugger.dump_metric_flops(configuration_metric, flops_bits_per_config, config_index) hawq_debugger.dump_avg_traces() hawq_debugger.dump_density_of_quantization_noise() hawq_debugger.dump_perturbations_ratio() hawq_debugger.dump_bitwidth_graph( self._algo, self._model, self._groups_of_adjacent_quantizers) str_bw = [str(element) for element in self.get_bitwidth_per_scope()] nncf_logger.info('\n'.join( ['\n\"bitwidth_per_scope\": [', ',\n'.join(str_bw), ']'])) self._model.to(original_device) ordered_metric_per_layer = self.get_metric_per_layer( chosen_config_in_execution_order, perturbations, traces_per_layer) return ordered_metric_per_layer
def __init__(self, module, input_infos: List[ModelInputInfo], dummy_forward_fn=None, wrap_inputs_fn=None, scopes_without_shape_matching=None, ignored_scopes=None, target_scopes=None, reset: bool = False): super().__init__() self._set_nncf_wrapped_model(module) self._forward_signature = inspect.signature(module.forward) self.input_infos = input_infos self.ignored_scopes = ignored_scopes self.target_scopes = target_scopes self._user_dummy_forward_fn = dummy_forward_fn device = next(module.parameters()).device if wrap_inputs_fn is not None: self._wrap_inputs_fn = wrap_inputs_fn else: self.__input_infos_based_input_wrapper = InputInfoWrapManager( self.input_infos, self._forward_signature, module_ref_for_device=self) self._wrap_inputs_fn = self.__input_infos_based_input_wrapper.wrap_inputs self._nncf_module_scopes = [] # type: List[Scope] self.scopes_without_shape_matching = scopes_without_shape_matching self.debug_interface = CombinedDebugInterface() if is_debug() else None self._extra_module_types = [] # type: List[ExtraCompressionModuleType] # pylint:disable=line-too-long self._insertions_into_original_graph = { } # type: Dict[InsertionPoint, List[Tuple[Callable, OperationPriority]]] _orig_graph_build_forward_fn = self._get_dummy_forward_fn_for_graph_building( with_input_tracing=True) self._graph_builder = GraphBuilder(_orig_graph_build_forward_fn) nncf_wrapped_model = self.get_nncf_wrapped_model() eval_only_ops_exec_ctx = self.collect_eval_only_ops_exec_context( nncf_wrapped_model, self._graph_builder) # all modules called in eval mode should be replaced prior to graph building self._replace_modules_by_nncf_modules(device, eval_only_ops_exec_ctx, reset) _orig_context = TracingContext() _orig_context.add_node_comparators([MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator()) if self.scopes_without_shape_matching: _orig_context.add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._original_graph = self._graph_builder.build_graph( nncf_wrapped_model, _orig_context, as_eval=True) self._compressed_context = TracingContext() self._dummy_forward_fn = self._get_dummy_forward_fn_for_graph_building( with_input_tracing=False) self._compressed_context.add_node_comparators( [MODEL_INPUT_OP_NAME], ShapeIgnoringTensorMetaComparator()) if self.scopes_without_shape_matching: self._compressed_context.add_node_comparators( scopes_without_shape_matching, ShapeIgnoringTensorMetaComparator()) self._load_listener = None self._builders = [] # type: List['CompressionAlgorithmBuilder']
def __init__(self, model: NNCFNetwork, quantization_controller: ExperimentalQuantizationController, hw_precision_constraints: HardwareQuantizationConstraints, eval_loader: torch.utils.data.DataLoader, eval_fn: Callable[[nn.Module, torch.utils.data.DataLoader], float], hw_config_type: HWConfigType, params: QuantizationEnvParams): logger.info("[Q.Env] Instantiating NNCF Quantization Environment") self.qctrl = quantization_controller self.qmodel = model self.eval_loader = eval_loader self.eval_fn = eval_fn self._hw_precision_constraints = hw_precision_constraints self.model_name = self.qmodel.nncf_module.__class__.__name__ # Check and only proceed if target device is supported by Q.Env self.hw_cfg_type = hw_config_type assert self.hw_cfg_type in [None, HWConfigType.VPU] # Set target compression ratio self.compression_ratio = params.compression_ratio self.eval_loader = PartialDataLoader( self.eval_loader, iter_ratio=params.eval_subset_ratio) # Bool to disable hard resource constraint self.skip_constraint = params.skip_constraint # Bool to enable fine-tuning in each episode. Placeholder for now self.finetune = params.skip_constraint # Configure search space for precision according to target device if self.hw_cfg_type is None: self.model_bitwidth_space = params.bits elif self.hw_cfg_type is HWConfigType.VPU: self.model_bitwidth_space = self._hw_precision_constraints.get_all_unique_bits( ) self.model_bitwidth_space = sorted(list(self.model_bitwidth_space)) # Create mapping of QuantizerId to the space of the corresponding quantizer's allowed qconfigs #pylint:disable=line-too-long self.qconfig_space_map = OrderedDict.fromkeys( self.qctrl.all_quantizations.keys( )) # type: Dict[QuantizerId, List[QuantizerConfig]] if self.hw_cfg_type is None: for qid in self.qconfig_space_map.keys(): conf = self.qctrl.all_quantizations[qid].get_current_config() conf_list_to_set = [] for bit in self.model_bitwidth_space: bit_adjusted_conf = deepcopy(conf) bit_adjusted_conf.bits = bit conf_list_to_set.append(bit_adjusted_conf) self.qconfig_space_map[qid] = conf_list_to_set else: for qid in self.qconfig_space_map: self.qconfig_space_map[ qid] = self._hw_precision_constraints.get(qid) # Quantizer Master Table Creation self._groups_of_adjacent_quantizers = self.qctrl._groups_of_adjacent_quantizers self.quantizer_table = self._create_quantizer_table() # Create master dataframe to keep track of quantizable layers and their attributes self.master_df, self.state_list = self._get_state_space( self.qctrl, self.qmodel, self.quantizer_table) if self.master_df.isnull().values.any(): raise ValueError("Q.Env Master Dataframe has null value(s)") assert len(self.quantizer_table) == len(self.qctrl.all_quantizations), \ "Number of Quantizer is not tally between quantizer table and quantization controller" # MinMaxScaler for State Embedding self.state_scaler = MinMaxScaler() self.state_scaler.fit(self.master_df[self.state_list]) # Model Size Calculation self.model_size_calculator = ModelSizeCalculator( self.qmodel, self.qconfig_space_map) self.orig_model_size = self.model_size_calculator.fp_model_size self.min_model_size = self.model_size_calculator.min_model_size self.max_model_size = self.model_size_calculator.max_model_size self.target_model_size = self.orig_model_size * self.compression_ratio if self.target_model_size < self.min_model_size and self.target_model_size > self.max_model_size: raise ValueError( "Model Size Ratio {} is out of bound ({}, {})".format( self.compression_ratio, self.min_model_size / self.orig_model_size, self.max_model_size / self.orig_model_size)) # Evaluate and store metric score of pretrained model self._evaluate_pretrained_model() self.qmodel_init_sd = deepcopy(self.qmodel.state_dict()) self.reset() self._dump_autoq_data = params.dump_init_precision_data if self._dump_autoq_data or is_debug(): dump_dir = params.log_dir if dump_dir is None: dump_dir = DEBUG_LOG_DIR self.dump_dir = Path(dump_dir) / Path("autoq_env_dump") self.dump_dir.mkdir(parents=True, exist_ok=True) # Serialize Q.Env information. Note that these functions should be at the end of Q.Env Initialization. self._dump_master_df() self._dump_quantized_graph() self._dump_groups_of_adjacent_quantizers()
def forward(self, x): if is_debug(): self.call_count += 1 if self.init_stage: return x return self.quantize(x)