def test_ad_hoc_range_init_does_not_replace_parameter_tensors( self, wrap_dataloader, quant_type): config = create_config() config["compression"].update({ "activations": { "mode": quant_type }, "weights": { "mode": quant_type } }) data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) model = TwoConvTestModel() quant_model, quant_ctrl = create_compressed_model_and_algo_for_test( model, config) param_name_vs_id = { name: id(tnsr) for name, tnsr in quant_model.named_parameters() } quant_ctrl.init_range() for name, param in quant_model.named_parameters(): assert param_name_vs_id[name] == id(param)
def test_scale_and_sign_init_for_quant_algo__after_load_state( self, wrap_dataloader): config = create_config() data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) ref_loaded_scale_val = torch.ones((1, 1, 1, 1)) * 100 load_state( compressed_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([0.]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': ref_loaded_scale_val # quantizer of 2nd conv's weights }) self.check_sign_and_scale( compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (False, torch.ones(2, 1, 1, 1)), '.*Sequential\\[1\\].*UpdateWeight. *': (True, ref_loaded_scale_val), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*nncf_model_input*': (False, 1) })
def register_default_init_args( nncf_config: 'NNCFConfig', train_loader: torch.utils.data.DataLoader, criterion: _Loss = None, criterion_fn: Callable[[Any, Any, _Loss], torch.Tensor] = None, autoq_eval_fn: Callable[[torch.nn.Module, torch.utils.data.DataLoader], float] = None, autoq_eval_loader: torch.utils.data.DataLoader = None, device: str = None) -> 'NNCFConfig': nncf_config.register_extra_structs([ QuantizationRangeInitArgs(data_loader=train_loader, device=device), BNAdaptationInitArgs(data_loader=train_loader, device=device) ]) if criterion: if not criterion_fn: criterion_fn = default_criterion_fn nncf_config.register_extra_structs([ QuantizationPrecisionInitArgs(criterion_fn=criterion_fn, criterion=criterion, data_loader=train_loader, device=device) ]) if autoq_eval_fn: if not autoq_eval_loader: autoq_eval_loader = train_loader nncf_config.register_extra_structs([ AutoQPrecisionInitArgs(data_loader=autoq_eval_loader, eval_fn=autoq_eval_fn, nncf_config=nncf_config) ]) return nncf_config
def register_default_init_args(nncf_config: 'NNCFConfig', criterion, train_loader) -> 'NNCFConfig': nncf_config.register_extra_structs([ QuantizationPrecisionInitArgs(criterion=criterion, data_loader=train_loader), QuantizationRangeInitArgs(data_loader=train_loader) ]) return nncf_config
def test_per_layer_range_init_with_correct_possible_config( self, wrap_dataloader, per_layer_range_init_test_struct): config = create_config() config['compression']['initializer'][ 'range'] = per_layer_range_init_test_struct.range_init_config data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) ctrl, _ = self.create_algo_and_compressed_model(config) for str_scope, range_init_config in per_layer_range_init_test_struct.expected_modules_to_init.items( ): assert ctrl.modules_to_range_init[str_scope][ 1] == range_init_config
def test_scale_and_sign_init_for_quant_algo__without_init_section( self, wrap_dataloader, config_creator): config = config_creator() data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) self.check_sign_and_scale( compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (True, 1), '.*Sequential\\[1\\].*UpdateWeight. *': (False, 1), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*Sequential\\[1\\].*': (True, 24) })
def register_default_init_args(nncf_config: 'NNCFConfig', train_loader, criterion: _Loss = None, criterion_fn: Callable[[Any, Any, _Loss], torch.Tensor] = None, device='cuda') -> 'NNCFConfig': if criterion: if not criterion_fn: criterion_fn = default_criterion_fn nncf_config.register_extra_structs([ QuantizationPrecisionInitArgs(criterion_fn=criterion_fn, criterion=criterion, data_loader=train_loader, device=device), QuantizationRangeInitArgs(data_loader=train_loader, device=device), BNAdaptationInitArgs(data_loader=train_loader, device=device) ]) else: nncf_config.register_extra_structs([ QuantizationRangeInitArgs(data_loader=train_loader, device=device), BNAdaptationInitArgs(data_loader=train_loader, device=device) ]) return nncf_config
def test_scope_overrides(self, wrap_dataloader): config = create_config() config['target_device'] = 'NONE' config["compression"]["scope_overrides"] = { r"{re}NNCFConv2d\[[0-9]*\]$": { "bits": 7, "mode": "asymmetric", }, "/nncf_model_input_0": { "bits": 7, "mode": "asymmetric", }, r"{re}NNCFConv2d\[[0-9]*\]/conv2d_0": { "bits": 7, "signed": False, } } data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) quantizers = get_all_modules_by_type( compressed_model, ['SymmetricQuantizer', 'AsymmetricQuantizer']) quantizer_str_dict = {str(k): v for k, v in quantizers.items()} group_1 = [ quantizer_str_dict[ "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/" "Sequential[0]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/" "AsymmetricQuantizer[op]"], quantizer_str_dict[ "NNCFNetwork/ModuleDict[activation_quantizers]/AsymmetricQuantizer" "[/nncf_model_input_0]"], quantizer_str_dict[ "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/" "Sequential[1]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/" "AsymmetricQuantizer[op]"] ] group_2 = [ quantizer_str_dict[ "NNCFNetwork/ModuleDict[activation_quantizers]/" "SymmetricQuantizer[TwoConvTestModel/Sequential[features]" "/Sequential[0]/NNCFConv2d[0]/conv2d_0]"] ] for quantizer in group_1: assert isinstance(quantizer, AsymmetricQuantizer) assert quantizer.levels == 2**7 for quantizer in group_2: assert isinstance(quantizer, SymmetricQuantizer) assert not quantizer.signed
def test_scale_and_sign_init_for_quant_algo__with_zero_init_steps( self, wrap_dataloader): config = create_config() config['compression']['initializer']['range']['num_init_steps'] = 0 data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) self.check_sign_and_scale( compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (False, 1), '.*Sequential\\[1\\].*UpdateWeight. *': (False, 1), '.*activation_quantizers.*Sequential\\[0\\].*': (False, 1), '.*activation_quantizers.*Sequential\\[1\\].*': (False, 1) })
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path): distributed_init_test_default(gpu, ngpus_per_node, config) data_loader = create_rank_dataloader(config, gpu) model = safe_thread_call(partial(squeezenet1_1, pretrained=True)) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) compression_scheduler = compression_ctrl.scheduler quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) criterion = torch.nn.MSELoss().cuda(config.gpu) optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01) torch.backends.cudnn.benchmark = True # just to reproduce the same scale values without Dropout quant_model.eval() act_sum = 0 for layer in get_all_modules_by_type(quant_model, "SymmetricQuantizer").values(): act_sum += layer.scale.sum() ref_sum = 4447.291 assert act_sum.item() == approx(ref_sum, 0.01), \ 'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank) out_file_path = get_path_after_broadcast(tmp_path, config.rank) save_params(quant_model, out_file_path) compression_scheduler.step() for i, (input_, _) in enumerate(data_loader): if i > 5: break output = quant_model(input_) optimizer.zero_grad() dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True) loss = criterion(output, dummy_target) compression_scheduler.step() loss.backward() optimizer.step() compression_scheduler.step() out_file_path = get_path_path_after_train_iters(tmp_path, config.rank) save_params(quant_model, out_file_path)
def test_staged_scheduler_with_range_init(): config = get_squeezenet_quantization_config() config['compression'].update({ 'params': { "activations_quant_start_epoch": 1, "weights_quant_start_epoch": 2, }, 'initializer': { 'range': { 'num_init_samples': 1 } } }) model = squeezenet1_1(num_classes=10, dropout=0) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader( OnesDatasetMock(input_sample_size[1:]), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) model, algo = create_compressed_model_and_algo_for_test(model, config) scheduler = algo.scheduler for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): if module.is_weights: assert not module.is_enabled_quantization() else: assert module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert module.is_enabled_quantization()
def test_per_layer_range_init_is_called_the_required_number_of_times( range_init_call_count_test_struct, mocker): config = create_config() config['compression']['initializer'][ 'range'] = range_init_call_count_test_struct.range_init_config data_loader = TestRangeInit.create_dataloader(False, config, 10) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) range_minmax_init_create_spy = mocker.spy( nncf.quantization.init_range.MinMaxInitializer, '__init__') range_meanminmax_init_create_spy = mocker.spy( nncf.quantization.init_range.MeanMinMaxInitializer, '__init__') range_threesigma_init_create_spy = mocker.spy( nncf.quantization.init_range.ThreeSigmaInitializer, '__init__') range_minmax_init_register_input_spy = mocker.spy( nncf.quantization.init_range.MinMaxInitializer, 'register_input') range_meanminmax_init_register_input_spy = mocker.spy( nncf.quantization.init_range.MeanMinMaxInitializer, 'register_input') range_threesigma_init_register_input_spy = mocker.spy( nncf.quantization.init_range.ThreeSigmaInitializer, 'register_input') TestRangeInit.create_algo_and_compressed_model(config) assert range_minmax_init_create_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_initializer_create['min_max'] assert range_meanminmax_init_create_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_initializer_create['mean_min_max'] assert range_threesigma_init_create_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_initializer_create['three_sigma'] assert range_minmax_init_register_input_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_register_input['min_max'] assert range_meanminmax_init_register_input_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_register_input['mean_min_max'] assert range_threesigma_init_register_input_spy.call_count ==\ range_init_call_count_test_struct.expected_call_count_register_input['three_sigma']
def test_percentile_init(quantization_mode): class SyntheticDataset(torch.utils.data.Dataset): def __init__(self): self._length = 1 def __getitem__(self, idx): if idx >= self._length: raise StopIteration test_input_sample = torch.zeros([1, 100, 100]) for i in range(0, 100): for j in range(0, 100): test_input_sample[0][i][j] = i * 100 + j return test_input_sample, test_input_sample def __len__(self): return self._length data_loader = torch.utils.data.DataLoader(SyntheticDataset(), batch_size=1) config_with_init = NNCFConfig() config_with_init.update({ "input_info": { "sample_size": [1, 1, 100, 100] }, "compression": { "algorithm": "quantization", "activations": { "mode": quantization_mode, }, "weights": { "mode": quantization_mode, }, "initializer": { "range": { "num_init_steps": 1, "type": "percentile", "min_percentile": 32.10, "max_percentile": 67.89 } } } }) # Activations init check id_model = SingleConv2dIdentityModel() config_with_init.register_extra_structs( [QuantizationRangeInitArgs(data_loader)]) _, compression_ctrl = create_compressed_model_and_algo_for_test( id_model, config_with_init) act_quantizer = next(iter(compression_ctrl.non_weight_quantizers.values())) def assert_range(quantizer: BaseQuantizer): # Absolute tolerance is 1.0 due to percentile value interpolation if quantization_mode == 'symmetric': assert quantizer.scale.item() == approx(6789, abs=1.0) else: assert quantizer.input_low.item() == approx(3210, abs=1.0) assert quantizer.input_range.item() == approx(3578, abs=1.0) assert_range(act_quantizer) # Weight init check synth_weight_model = SingleConv2dSyntheticWeightModel() _, compression_ctrl = create_compressed_model_and_algo_for_test( synth_weight_model, config_with_init) weight_quantizer = next( iter(compression_ctrl.non_weight_quantizers.values())) assert_range(weight_quantizer)
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path): config.batch_size = 3 config.workers = 3 config.gpu = gpu config.ngpus_per_node = ngpus_per_node config.rank = gpu config.distributed = True torch.distributed.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:8899', world_size=config.world_size, rank=config.rank) model = safe_thread_call(partial(squeezenet1_1_custom, pretrained=True)) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = torch.utils.data.DataLoader(RankDatasetMock( input_sample_size[1:], config.rank), batch_size=3, num_workers=1, shuffle=False) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) compression_scheduler = compression_ctrl.scheduler torch.cuda.set_device(config.gpu) quant_model.cuda(config.gpu) config.batch_size = int(config.batch_size / ngpus_per_node) config.workers = int(config.workers / ngpus_per_node) quant_model = torch.nn.parallel.DistributedDataParallel( quant_model, device_ids=[config.gpu]) compression_ctrl.distributed() criterion = torch.nn.MSELoss().cuda(config.gpu) optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01) torch.backends.cudnn.benchmark = True # just to reproduce the same scale values without Dropout quant_model.eval() act_sum = 0 for layer in get_all_modules_by_type(quant_model, "SymmetricQuantizer").values(): act_sum += layer.scale ref_sum = 3467.322 assert act_sum.item() == approx(ref_sum, 0.01), \ 'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank) out_file_path = get_path_after_broadcast(tmp_path, config.rank) save_params(quant_model, out_file_path) compression_scheduler.step() for i, (input_, _) in enumerate(data_loader): if i > 5: break output = quant_model(input_) optimizer.zero_grad() dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True) loss = criterion(output, dummy_target) compression_scheduler.step() loss.backward() optimizer.step() compression_scheduler.step() out_file_path = get_path_path_after_train_iters(tmp_path, config.rank) save_params(quant_model, out_file_path)
def test_percentile_init(quantization_mode: str, per_channel: bool): class SyntheticDataset(torch.utils.data.Dataset): def __init__(self): self._length = 1 def __getitem__(self, idx): if idx >= self._length: raise StopIteration test_input_sample = torch.zeros([3, 100, 100]) for i in range(0, 100): for j in range(0, 100): test_input_sample[0][i][j] = i * 100 + j test_input_sample[1] = test_input_sample[0] test_input_sample[2] = test_input_sample[0] return test_input_sample, test_input_sample def __len__(self): return self._length data_loader = torch.utils.data.DataLoader(SyntheticDataset(), batch_size=1) config_with_init = NNCFConfig() config_with_init.update({ "input_info": { "sample_size": [1, 3, 100, 100] }, "target_device": "NONE", "compression": { "algorithm": "quantization", "activations": { "mode": quantization_mode, "per_channel": per_channel }, "weights": { "mode": quantization_mode, "per_channel": per_channel }, "initializer": { "range": { "num_init_steps": 1, "type": "percentile", "min_percentile": 32.10, "max_percentile": 67.89 } } } }) # Activations init check id_model = SingleConv2dIdentityModel() config_with_init.register_extra_structs( [QuantizationRangeInitArgs(data_loader)]) _, compression_ctrl = create_compressed_model_and_algo_for_test( id_model, config_with_init) act_quantizer_info = next( iter(compression_ctrl.non_weight_quantizers.values())) def check_scales(quantizer: BaseQuantizer, per_channel: bool): # Absolute tolerance is 1.0 due to percentile value interpolation if quantization_mode == 'symmetric': assert torch.allclose(quantizer.scale, torch.ones_like(quantizer.scale) * 6789, atol=1.0) if per_channel: assert quantizer.scale.numel() == 3 else: assert quantizer.scale.numel() == 1 else: assert torch.allclose(quantizer.input_low, torch.ones_like(quantizer.input_low) * 3210, atol=1.0) assert torch.allclose(quantizer.input_range, torch.ones_like(quantizer.input_low) * 3578, atol=1.0) if per_channel: assert quantizer.input_low.numel() == 3 assert quantizer.input_range.numel() == 3 else: assert quantizer.input_low.numel() == 1 assert quantizer.input_range.numel() == 1 check_scales(act_quantizer_info.quantizer_module_ref, per_channel) # Weight init check synth_weight_model = SingleConv2dSyntheticWeightModel() _, compression_ctrl = create_compressed_model_and_algo_for_test( synth_weight_model, config_with_init) weight_quantizer = next(iter(compression_ctrl.weight_quantizers.values())) check_scales(weight_quantizer, per_channel)