def check_correct_nncf_modules_replacement(model: NNCFNetwork, compressed_model: NNCFNetwork) \ -> Tuple[Dict[Scope, Module], Dict[Scope, Module]]: """ Check that all convolutions in model was replaced by NNCF convolution. :param model: original model :param compressed_model: compressed model :return: list of all convolutions in original model and list of all NNCF convolutions from compressed model """ NNCF_MODULES_REVERSED_MAP = { value: key for key, value in NNCF_MODULES_MAP.items() } original_modules = get_all_modules_by_type(model, list(NNCF_MODULES_MAP.values())) nncf_modules = get_all_modules_by_type( compressed_model.get_nncf_wrapped_model(), list(NNCF_MODULES_MAP.keys())) assert len(original_modules) == len(nncf_modules) print(original_modules, nncf_modules) for scope in original_modules.keys(): sparse_scope = deepcopy(scope) elt = sparse_scope.pop() # type: ScopeElement elt.calling_module_class_name = NNCF_MODULES_REVERSED_MAP[ elt.calling_module_class_name] sparse_scope.push(elt) print(sparse_scope, nncf_modules) assert sparse_scope in nncf_modules return original_modules, nncf_modules
def test_can_load_quant_algo__with_defaults(): model = BasicConvTestModel() config = get_quantization_config_without_range_init() register_bn_adaptation_init_args(config) builder = create_compression_algorithm_builder(config) assert isinstance(builder, QuantizationBuilder) quant_model, _ = create_compressed_model_and_algo_for_test( deepcopy(model), config) model_conv = get_all_modules_by_type(model, 'Conv2d') quant_model_conv = get_all_modules_by_type( quant_model.get_nncf_wrapped_model(), 'NNCFConv2d') assert len(model_conv) == len(quant_model_conv) for module_scope, _ in model_conv.items(): quant_scope = deepcopy(module_scope) # type: Scope quant_scope.pop() quant_scope.push(ScopeElement('NNCFConv2d', 'conv')) assert quant_scope in quant_model_conv.keys() store = [] for op in quant_model_conv[quant_scope].pre_ops.values(): if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance( op.operand, SymmetricQuantizer): assert op.__class__.__name__ not in store store.append(op.__class__.__name__) assert UpdateWeight.__name__ in store
def get_all_quantizers_per_full_scope(model): all_quantizations = OrderedDict() for class_type in QUANTIZATION_MODULES.registry_dict.values(): quantization_type = class_type.__name__ all_quantizations.update( get_all_modules_by_type( model.get_compression_modules_by_type( ExtraCompressionModuleType.EXTERNAL_QUANTIZER), quantization_type)) all_quantizations.update( get_all_modules_by_type(model.get_nncf_wrapped_model(), quantization_type)) all_quantizations = OrderedDict( sorted(all_quantizations.items(), key=lambda x: str(x[0]))) return all_quantizations
def test_can_quantize_inputs_for_sparsity_plus_quantization(): model = BasicConvTestModel() config = get_basic_sparsity_plus_quantization_config() register_bn_adaptation_init_args(config) sparse_quantized_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) assert isinstance(compression_ctrl, CompositeCompressionAlgorithmController) sparse_quantized_model_conv = get_all_modules_by_type( sparse_quantized_model, 'NNCFConv2d') nncf_module = next(iter(sparse_quantized_model_conv.values())) assert len( nncf_module.pre_ops) == 2 # 1x weight sparsifier + 1x weight quantizer assert isinstance(nncf_module.pre_ops['0'], UpdateWeight) assert isinstance(nncf_module.pre_ops['0'].op, RBSparsifyingWeight) assert isinstance(nncf_module.pre_ops['1'], UpdateWeight) assert isinstance(nncf_module.pre_ops['1'].op, SymmetricQuantizer) input_quantizer = get_all_modules(sparse_quantized_model)[ f'NNCFNetwork/ModuleDict[{EXTERNAL_QUANTIZERS_STORAGE_NAME}]'] assert len(input_quantizer) == 1 assert isinstance(list(input_quantizer.values())[0], SymmetricQuantizer)
def __init__( self, algo: 'ExperimentalQuantizationController', params: BasePrecisionInitParams, hw_precision_constraints: HardwareQuantizationConstraints = None): self._algo = algo self._model = self._algo._model # type: NNCFNetwork all_quantizers = algo.all_quantizations self._hw_precision_constraints = hw_precision_constraints self.original_precisions = { q_id: quantizer.num_bits for q_id, quantizer in all_quantizers.items() } self._quantizers_handler = WeightQuantizersHandler( self._model, self._algo.weight_quantizers, self._hw_precision_constraints) quantization_types = [ class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values() ] self._weight_quantizations_by_execution_order = self._quantizers_handler. \ get_weight_quantizers_in_execution_order_per_id() self._all_quantizers_per_scope = get_all_modules_by_type( self._model.get_compression_modules_by_type( ExtraCompressionModuleType.EXTERNAL_QUANTIZER), quantization_types) self._all_quantizers_per_scope.update( self._quantizers_handler. get_all_weight_quantizers_in_execution_order_per_scope())
def create_frozen_model(self): """ Freeze first conv by default and freeze all convs if _freeze_all is True""" model = TwoConvTestModel() num_convs_to_freeze = -1 if self._freeze_all else 1 for i, module in enumerate( get_all_modules_by_type(model, 'Conv2d').values()): if i < num_convs_to_freeze or num_convs_to_freeze == -1: module.weight.requires_grad = False return model
def save_params(model, out_file_path): gpu_scale_signed_params = [] for _, layer in utils.get_all_modules_by_type( model, 'SymmetricQuantizer').items(): gpu_scale_signed_params.append( (layer.scale.to(torch.device('cpu')), layer.signed_tensor.to(torch.device('cpu')))) with out_file_path.open('wb') as out_file: torch.save(gpu_scale_signed_params, out_file)
def get_modules_in_nncf_modules_by_type(self, types) -> Dict[Scope, nn.Module]: nncf_modules = self.get_nncf_modules() retval = {} for nncf_module_scope, nncf_module in nncf_modules.items(): nncf_module_scope.pop() for relative_scope, target_module in get_all_modules_by_type( nncf_module, types).items(): retval[nncf_module_scope + relative_scope] = target_module return retval
def check_sign_and_scale(model, ref_table): model_conv = get_all_modules_by_type(model, 'SymmetricQuantizer') for scope, module in model_conv.items(): for pattern, ref_values in ref_table.items(): match = re.search(pattern, str(scope)) if match: assert isinstance(module, SymmetricQuantizer) assert module.signed == ref_values[ 0], 'sign is not matched for {}'.format(str(scope)) assert all(module.scale == ref_values[1] ), 'scale is not matched for {}'.format( str(scope))
def test_get_all_modules_by_type__for_multiple_type(): model = ModelForNameTest() act_bn = get_all_modules_by_type(model, ['ReLU', 'AvgPool2d']) act_bn = OrderedDict((str(k), v) for k, v in act_bn.items()) ref_bn = [ 'ModelForNameTest/AvgPool2d[avgpool]', 'ModelForNameTest/Sequential[layer1]/ReLU[relu01]', 'ModelForNameTest/Sequential[layer2]/Sequential[layer1]/ReLU[relu01]', 'ModelForNameTest/Sequential[layer2]/ReLU[relu02]' ] assert list(act_bn.keys()) == ref_bn assert isinstance(act_bn, OrderedDict)
def test_scope_overrides(self, wrap_dataloader): config = create_config() config['target_device'] = 'TRIAL' config["compression"]["scope_overrides"] = { "weights": { r"{re}NNCFConv2d\[[0-9]*\]/conv2d_0": { "bits": 7, "mode": "asymmetric", }, }, "activations": { r"{re}NNCFConv2d\[[0-9]*\]/conv2d_0": { "bits": 7, "signed": False, } } } data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) quantizers = get_all_modules_by_type( compressed_model, ['SymmetricQuantizer', 'AsymmetricQuantizer']) quantizer_str_dict = {str(k): v for k, v in quantizers.items()} group_1 = [ quantizer_str_dict[ "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/" "Sequential[0]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/" "AsymmetricQuantizer[op]"], quantizer_str_dict[ "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/" "Sequential[1]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/" "AsymmetricQuantizer[op]"] ] group_2 = [ quantizer_str_dict[ f"NNCFNetwork/ModuleDict[{EXTERNAL_QUANTIZERS_STORAGE_NAME}]/" "SymmetricQuantizer[TwoConvTestModel/Sequential[features]" "/Sequential[0]/NNCFConv2d[0]/conv2d_0|OUTPUT]"], quantizer_str_dict[ f"NNCFNetwork/ModuleDict[{EXTERNAL_QUANTIZERS_STORAGE_NAME}]/SymmetricQuantizer" "[/nncf_model_input_0|OUTPUT]"], ] for quantizer in group_1: assert isinstance(quantizer, AsymmetricQuantizer) assert quantizer.levels == 2**7 for quantizer in group_2: assert isinstance(quantizer, SymmetricQuantizer) assert not quantizer.signed
def test_hawq_on_single_conv_without_quantizers(_seed, dataset_dir, tmp_path, params: HAWQTestParams, mocker): config = get_squeezenet_quantization_config(batch_size=params.batch_size) iter_number = params.iter_number tolerance = 4e-4 model = squeezenet1_1(num_classes=10, dropout=0) from torchvision.models.squeezenet import model_urls load_state(model, model_zoo.load_url(model_urls['squeezenet1_1'])) criterion = nn.CrossEntropyLoss() ref_trace = params.cpu_ref_trace rtol = 1e-5 if torch.cuda.is_available(): model = model.cuda() criterion = criterion.cuda() ref_trace = params.cuda_ref_trace rtol = 1e-6 if not dataset_dir: dataset_dir = str(tmp_path) data_loader, _ = create_test_dataloaders(config, dataset_dir) device = next(model.parameters()).device for _, param in model.named_parameters(): param.requires_grad = False first_conv = next(iter(get_all_modules_by_type(model, 'Conv2d').values())) first_conv.weight.requires_grad = True ph_import = 'nncf.torch.quantization.hessian_trace.ParameterHandler' sample_rademacher_patch = mocker.patch( f'{ph_import}.sample_rademacher_like_params', autospec=True) sample_normal_patch = mocker.patch( f'{ph_import}.sample_normal_like_params', autospec=True) def mock_sampling_fn(self): # pylint:disable=protected-access return list( map( lambda x: torch.from_numpy(random_sample(x.shape)).to( device=self._device), self.parameters)) sample_rademacher_patch.side_effect = mock_sampling_fn sample_normal_patch.side_effect = mock_sampling_fn trace_estimator = HessianTraceEstimator(model, default_criterion_fn, criterion, device, data_loader, params.num_data_points) actual_state = trace_estimator.get_average_traces(max_iter=iter_number, tolerance=tolerance) assert math.isclose(actual_state.item(), ref_trace, rel_tol=rtol)
def test_sparse_network(self, desc: ModelDesc, algo): model = desc.model_builder() config = get_empty_config(input_sample_sizes=desc.input_sample_sizes) config["compression"] = {"algorithm": algo} compressed_model, compression_ctrl = \ create_compressed_model_and_algo_for_test(model, config, dummy_forward_fn=desc.dummy_forward_fn, wrap_inputs_fn=desc.wrap_inputs_fn) sparsifiable_modules = self.get_sparsifiable_modules(algo) ref_num_sparsed = len(get_all_modules_by_type(model, sparsifiable_modules)) assert ref_num_sparsed == len(compression_ctrl.sparsified_module_info) check_model_graph(compressed_model, desc.dot_filename, algo)
def test_get_all_modules_by_type__with_ignored_scope(ignored_scopes): model = ModelForNameTest() model_modules = set() for _, module in model.named_modules(): model_modules.add(module.__class__.__name__) model_modules = list(model_modules) act_modules = get_all_modules_by_type(model, model_modules, ignored_scopes=ignored_scopes) for module_scope, _ in act_modules.items(): for scope in ignored_scopes: assert not str(module_scope).startswith(str(scope))
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path): distributed_init_test_default(gpu, ngpus_per_node, config) data_loader = create_rank_dataloader(config, gpu) model = safe_thread_call(partial(squeezenet1_1, pretrained=True)) config.register_extra_structs( [QuantizationRangeInitArgs(wrap_dataloader_for_init(data_loader))]) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) compression_scheduler = compression_ctrl.scheduler quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) criterion = torch.nn.MSELoss().cuda(config.gpu) optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01) torch.backends.cudnn.benchmark = True # just to reproduce the same scale values without Dropout quant_model.eval() act_sum = 0 for layer in get_all_modules_by_type(quant_model, "SymmetricQuantizer").values(): act_sum += layer.scale.sum() ref_sum = 3720.864 assert act_sum.item() == approx(ref_sum, 0.01), \ 'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank) out_file_path = get_path_after_broadcast(tmp_path, config.rank) save_params(quant_model, out_file_path) compression_scheduler.step() for i, (input_, _) in enumerate(data_loader): if i > 5: break output = quant_model(input_) optimizer.zero_grad() dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True) loss = criterion(output, dummy_target) compression_scheduler.step() loss.backward() optimizer.step() compression_scheduler.step() out_file_path = get_path_path_after_train_iters(tmp_path, config.rank) save_params(quant_model, out_file_path)
def __init__(self, model: NNCFNetwork, weight_quantizers: Dict[WeightQuantizerId, WeightQuantizerInfo], constraints: HardwareQuantizationConstraints): self._wq_affected_module_node_name_vs_qid_dict = { k.target_node_name: k for k in weight_quantizers.keys() } self._quantizer_module_scope_vs_qid_dict = { } # type: Dict[Scope, WeightQuantizerId] self._skipped_quantized_weight_node_names = [] self._skipped_weight_quantizers = { } # type: Dict[WeightQuantizerId, BaseQuantizer] self._weight_quantizers_in_execution_order_per_scope = OrderedDict( ) # type: Dict[Scope, BaseQuantizer] self._weight_quantizers_in_execution_order = OrderedDict( ) # type: Dict[WeightQuantizerId, BaseQuantizer] quantization_types = [ class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values() ] weight_module_dict = model.get_nncf_wrapped_model() quantizers_in_execution_order_per_scope = get_all_modules_by_type( weight_module_dict, quantization_types) for scope, quantizer in quantizers_in_execution_order_per_scope.items( ): if self.is_wq_scope(scope): affected_module_scope = self.get_owning_module_scope_from_wq_scope( scope) affected_module_node = model.get_original_graph( ).get_op_nodes_in_scope(affected_module_scope)[0] if affected_module_node.node_name in self._wq_affected_module_node_name_vs_qid_dict: qid = self._wq_affected_module_node_name_vs_qid_dict[ affected_module_node.node_name] if len(constraints.get_all_unique_bitwidths(qid)) != 1: self._weight_quantizers_in_execution_order_per_scope[ scope] = quantizer self._weight_quantizers_in_execution_order[ qid] = quantizer else: self._skipped_quantized_weight_node_names.append( affected_module_node.node_name) self._skipped_weight_quantizers[qid] = quantizer
def test_autoq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() model = params.model_creator() if torch.cuda.is_available(): model = model.cuda() config['log_dir'] = str(tmp_path) if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) from nncf.torch.automl.agent.ddpg.ddpg import DDPG random_action_spy = mocker.spy(DDPG, 'random_action') select_action_spy = mocker.spy(DDPG, 'select_action') from nncf.torch.quantization.precision_init.autoq_init import AutoQPrecisionInitializer autoq_obj_init_spy = mocker.spy(AutoQPrecisionInitializer, '__init__') adjust_pad_creation_spy = mocker.spy(UpdatePaddingValue, '__init__') config = register_default_init_args(config, train_loader, autoq_eval_fn=lambda *x: random(), val_loader=train_loader) model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) bw_init_config = config['compression']['initializer']['precision'] learning_iter_number = bw_init_config['iter_number'] - bw_init_config[ 'warmup_iter_number'] experimental_ctrl = autoq_obj_init_spy.call_args[0][1] n_quantizer = len(experimental_ctrl.all_quantizations) assert random_action_spy.call_count == bw_init_config[ 'warmup_iter_number'] * n_quantizer assert select_action_spy.call_count == learning_iter_number * ( n_quantizer + 1) + bw_init_config['warmup_iter_number'] final_num_of_adjust_pad_ops = len( get_all_modules_by_type(model, 'UpdatePaddingValue')) assert adjust_pad_creation_spy.call_count == final_num_of_adjust_pad_ops path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix()) graph_dir = os.path.join('quantized', 'autoq') check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
def test_sparse_quantize_network(self, desc: ModelDesc): model = desc.model_builder() config = get_empty_config(input_sample_sizes=desc.input_sample_sizes) config["compression"] = [ {"algorithm": "rb_sparsity"}, {"algorithm": "quantization"} ] register_bn_adaptation_init_args(config) compressed_model, compression_ctrl = \ create_compressed_model_and_algo_for_test(model, config, dummy_forward_fn=desc.dummy_forward_fn, wrap_inputs_fn=desc.wrap_inputs_fn) sparsifiable_modules = self.get_sparsifiable_modules('rb_sparsity') ref_num_sparsed = len(get_all_modules_by_type(compressed_model, sparsifiable_modules)) assert ref_num_sparsed == len(compression_ctrl.child_ctrls[0].sparsified_module_info) check_model_graph(compressed_model, desc.dot_filename, "quantized_rb_sparsity")
def disable_quantizer_gradients(): config = get_quantization_config_without_range_init() config['input_info'] = { "sample_size": [2, 3, 10, 10], } register_bn_adaptation_init_args(config) model = MobileNetV2(num_classes=10) model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) original_requires_grad_per_param = get_requires_grad_per_param(model) quantization_types = [ class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values() ] all_quantizations = get_all_modules_by_type(model, quantization_types) quantizers_switcher = QuantizersSwitcher(list(all_quantizations.values())) params_to_restore = HAWQPrecisionInitializer.disable_all_gradients_except_weights_of_quantized_modules( quantizers_switcher, compression_ctrl.weight_quantizers, model, get_skipped_quantized_weight_node_names()) return quantizers_switcher, params_to_restore, model, compression_ctrl, original_requires_grad_per_param
def test_get_all_modules_by_type__for_standard_type(): model = ModelForNameTest() act_bn = get_all_modules_by_type(model, 'BatchNorm2d') act_bn = OrderedDict((str(k), v) for k, v in act_bn.items()) ref_bn = { 'ModelForNameTest/BatchNorm2d[bn1]': model.bn1, 'ModelForNameTest/BatchNorm2d[bn2]': model.bn2, 'ModelForNameTest/BatchNorm2d[norm10]': model.norm10, 'ModelForNameTest/BatchNorm2d[norm20]': model.norm20, 'ModelForNameTest/Sequential[layer1]/BatchNorm2d[norm01]': model.norm10, 'ModelForNameTest/Sequential[layer2]/BatchNorm2d[norm02]': model.norm20, 'ModelForNameTest/Sequential[layer2]/Sequential[layer1]/BatchNorm2d[norm01]': model.norm10, } assert act_bn == ref_bn
def test_get_all_modules_by_type__for_not_exact_type(): model = ModelForNameTest() l = get_all_modules_by_type(model, 'Avg') assert not l
def test_get_all_modules_by_type__for_subtype(): model = ModelForNameTest() l = get_all_modules_by_type(model, 'AvgPool2d_dummy') assert not l
def get_avg_traces(model, init_device: str): num_layers = len(get_all_modules_by_type(model, ['Conv2d', 'Linear'])) return torch.randperm(num_layers).to(init_device) + 1
def get_nncf_modules_by_module_names( self, nncf_module_names_list: List[str] ) -> Dict["Scope", torch.nn.Module]: return get_all_modules_by_type(self.get_nncf_wrapped_model(), nncf_module_names_list)
def get_nncf_modules(self) -> Dict[Scope, torch.nn.Module]: nncf_module_names_list = NNCF_MODULES + [ x.__name__ for x in NNCF_WRAPPED_USER_MODULES_DICT.values() ] return get_all_modules_by_type(self.get_nncf_wrapped_model(), nncf_module_names_list)