def worker(rank: int, world_size: int) -> None: torch.distributed.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:8999', world_size=world_size, rank=rank) model = TestModelWithChangedTrain(freezing_stages=1) model.cuda() model.to(rank) nncf_config = NNCFConfig() nncf_config.update({ "input_info": { "sample_size": [1, 1, 30, 30] }, "compression": { "algorithm": "quantization", "initializer": { "range": { "num_init_samples": 10 }, "batchnorm_adaptation": { "num_bn_adaptation_samples": 10 } } } }) dataloader = create_random_mock_dataloader(nncf_config, num_samples=10) register_default_init_args(nncf_config, dataloader) _, compressed_model = create_compressed_model(model, nncf_config) # At this part the additional processes may be freezing _ = torch.nn.parallel.DistributedDataParallel(compressed_model, device_ids=[rank])
def test_default_distributed_init_struct(): config = get_basic_pruning_config() init_loader = create_ones_mock_dataloader(config) register_default_init_args(config, init_loader) dist_callbacks = config.get_extra_struct(DistributedCallbacksArgs) assert callable(dist_callbacks.wrap_model) assert callable(dist_callbacks.unwrap_model)
def test_evolution_env_setting_params(): steps_ref = 100 prune_target_ref = 0.1 train_optimizer = partial(optim.Adam) model = PruningTestModel() config = create_default_legr_config() config['compression']['params']['legr_params'] = {} config['compression']['params']['legr_params']['train_steps'] = steps_ref config['compression']['params']['legr_params'][ 'max_pruning'] = prune_target_ref train_loader = create_ones_mock_dataloader(config) val_loader = create_ones_mock_dataloader(config) train_steps_fn = lambda *x: None validate_fn = lambda *x: (0, 0) nncf_config = register_default_init_args( config, train_loader=train_loader, train_steps_fn=train_steps_fn, val_loader=val_loader, validate_fn=validate_fn, legr_train_optimizer=train_optimizer) _, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config) evolution_env = compression_ctrl.legr.env assert evolution_env.prune_target == prune_target_ref assert evolution_env.steps == steps_ref assert evolution_env.train_optimizer == train_optimizer
def test_evolution_env_default_params(): model = PruningTestModel() config = create_default_legr_config() train_loader = create_ones_mock_dataloader(config) val_loader = create_ones_mock_dataloader(config) train_steps_fn = lambda *x: None validate_fn = lambda *x: (0, 0) nncf_config = register_default_init_args(config, train_loader=train_loader, train_steps_fn=train_steps_fn, val_loader=val_loader, validate_fn=validate_fn) _, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) evolution_env = compression_ctrl.legr.env assert evolution_env.loss_as_reward is True assert evolution_env.prune_target == 0.5 assert evolution_env.steps == 200 assert evolution_env.train_loader == train_loader assert evolution_env.val_loader == val_loader assert evolution_env.train_fn == train_steps_fn assert evolution_env.validate_fn == validate_fn assert evolution_env.config == nncf_config
def test_can_resume_with_algo_mixing(mocker, is_strict): desc = TestPrecisionInitDesc().config_with_all_inits() all_quantization_init_spies = desc.setup_init_spies(mocker) sparsity_config = get_basic_sparsity_config() sparsity_config['target_device'] = 'TRIAL' config = desc.config quantization_section = config['compression'] config['compression'] = [{ 'algorithm': 'const_sparsity' }, quantization_section] _, compression_ctrl = create_compressed_model_and_algo_for_test( desc.model_creator(), sparsity_config) compression_state = compression_ctrl.get_compression_state() config = register_default_init_args( config, train_loader=create_ones_mock_dataloader(config)) fn = partial(create_compressed_model_and_algo_for_test, desc.model_creator(), config, compression_state=compression_state) if is_strict: with pytest.raises(RuntimeError): fn() else: _, compression_ctrl = fn() for m in all_quantization_init_spies: m.assert_called() desc.check_precision_init(compression_ctrl.child_ctrls[1])
def test_default_legr_init_struct(): config = get_basic_pruning_config() init_loader = create_ones_mock_dataloader(config) nncf_config = register_default_init_args(config, init_loader) with pytest.raises(KeyError): nncf_config.get_extra_struct(LeGRInitArgs)
def test_hawq_hw_vpu_config_e2e(_seed, dataset_dir, tmp_path): config = HAWQConfigBuilder().for_vpu().liberal_mode().with_ratio( 1.5).build() model = MobileNetV2(num_classes=10) criterion = nn.CrossEntropyLoss() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) config = register_default_init_args(config, train_loader, criterion=criterion) create_compressed_model_and_algo_for_test(model, config)
def test_staged_scheduler_with_hawq(): config = get_squeezenet_quantization_config() config['compression'].update({ 'params': { "activations_quant_start_epoch": 1, "weights_quant_start_epoch": 2, }, 'initializer': { 'range': { 'num_init_samples': 1 }, 'precision': { "type": "hawq", "num_data_points": 1, "iter_number": 1, "tolerance": 1 } } }) num_classes = 10 model = squeezenet1_1(num_classes=num_classes, dropout=0) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader(HawqDatasetMock(input_sample_size[1:], num_classes), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) criterion = nn.CrossEntropyLoss().cuda() config = register_default_init_args(config, data_loader, criterion=criterion) model, algo = create_compressed_model_and_algo_for_test(model, config) scheduler = algo.scheduler for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for wq_info in algo.weight_quantizers.values(): assert not wq_info.quantizer_module_ref.is_enabled_quantization() for aq_info in algo.non_weight_quantizers.values(): assert aq_info.quantizer_module_ref.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert module.is_enabled_quantization()
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config_builder = params.config_builder config = config_builder.build() model = params.model_creator() if torch.cuda.is_available(): model = model.cuda() pregen_device = 'cuda' else: pregen_device = 'cpu' pregen_traces_for_all_layers = params.avg_traces_creator( model, pregen_device) criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) config = register_default_init_args(config, train_loader, criterion=criterion) mocked_trace = mocker.patch( 'nncf.torch.quantization.hessian_trace.HessianTraceEstimator.get_average_traces', autospec=True) ratio_list_spy = mocker.spy(HAWQPrecisionInitializer, 'get_compression_ratio_per_qconfig_sequence') chosen_index_spy = mocker.spy(HAWQPrecisionInitializer, 'choose_qconfig_sequence') # There may be less traces required to be calculated during HAWQ than there are weightable layers. def side_effect_fn(self, max_iter=500, tolerance=1e-5): # pylint:disable=protected-access return pregen_traces_for_all_layers[:len(self._parameter_handler. parameters)] mocked_trace.side_effect = side_effect_fn model, ctrl = create_compressed_model_and_algo_for_test(model, config) path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, config_builder.filename_suffix()) graph_dir = os.path.join('quantized', 'hawq') check_bitwidth_graph(ctrl, model, path_to_dot, graph_dir, add_flops=config_builder.should_add_flops) if config_builder.compression_ratio: ratio_list = ratio_list_spy.spy_return index = chosen_index_spy.spy_return assert config_builder.compression_ratio == ratio_list[index]
def get_model_and_controller_for_legr_test(): model = PruningTestModel() config = create_default_legr_config() train_loader = create_ones_mock_dataloader(config) val_loader = create_ones_mock_dataloader(config) train_steps_fn = lambda *x: None validate_fn = lambda *x: (0, 0) nncf_config = register_default_init_args(config, train_loader=train_loader, train_steps_fn=train_steps_fn, val_loader=val_loader, validate_fn=validate_fn) compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) return nncf_config, compressed_model, compression_ctrl
def nncf_config_with_default_init_args_(mocker): config = NNCFConfig.from_dict(CONFIG_WITH_ALL_INIT_TYPES) train_loader = DataLoader( OnesDatasetMock(INPUT_SAMPLE_SIZE[1:]), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) mocker_criterion = mocker.stub() mocker_criterion.batch_size = 1 config = register_default_init_args(config, train_loader, criterion=mocker_criterion) return config
def test_valid_legr_init_struct(): config = get_basic_pruning_config() train_loader = create_ones_mock_dataloader(config) val_loader = create_ones_mock_dataloader(config) train_steps_fn = lambda *x: None validate_fn = lambda *x: (0, 0, 0) nncf_config = register_default_init_args(config, train_loader=train_loader, train_steps_fn=train_steps_fn, val_loader=val_loader, validate_fn=validate_fn) legr_init_args = config.get_extra_struct(LeGRInitArgs) assert legr_init_args.config == nncf_config assert legr_init_args.train_loader == train_loader assert legr_init_args.val_loader == val_loader assert legr_init_args.train_steps_fn == train_steps_fn
def test_autoq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() model = params.model_creator() if torch.cuda.is_available(): model = model.cuda() config['log_dir'] = str(tmp_path) if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) from nncf.torch.automl.agent.ddpg.ddpg import DDPG random_action_spy = mocker.spy(DDPG, 'random_action') select_action_spy = mocker.spy(DDPG, 'select_action') from nncf.torch.quantization.precision_init.autoq_init import AutoQPrecisionInitializer autoq_obj_init_spy = mocker.spy(AutoQPrecisionInitializer, '__init__') adjust_pad_creation_spy = mocker.spy(UpdatePaddingValue, '__init__') config = register_default_init_args(config, train_loader, autoq_eval_fn=lambda *x: random(), val_loader=train_loader) model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) bw_init_config = config['compression']['initializer']['precision'] learning_iter_number = bw_init_config['iter_number'] - bw_init_config[ 'warmup_iter_number'] experimental_ctrl = autoq_obj_init_spy.call_args[0][1] n_quantizer = len(experimental_ctrl.all_quantizations) assert random_action_spy.call_count == bw_init_config[ 'warmup_iter_number'] * n_quantizer assert select_action_spy.call_count == learning_iter_number * ( n_quantizer + 1) + bw_init_config['warmup_iter_number'] final_num_of_adjust_pad_ops = len( get_all_modules_by_type(model, 'UpdatePaddingValue')) assert adjust_pad_creation_spy.call_count == final_num_of_adjust_pad_ops path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix()) graph_dir = os.path.join('quantized', 'autoq') check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
def test_hawq_manual_configs(manual_config_params): # Tip: check and correct configs with hardcoded layer names (bitwidth_per_scope attribute) # in case you changed quantized NNCFGraph and this test failed # with error like `Could not find a quantization point at scope name...` config = manual_config_params.create_nncf_config() config = register_default_init_args(config, create_ones_mock_dataloader(config), criterion=None) model = manual_config_params.create_model(config['model']) _, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) nncf_stats = compression_ctrl.statistics() expected = manual_config_params.bit_stats actual = nncf_stats.quantization assert expected.num_wq_per_bitwidth == actual.num_wq_per_bitwidth assert expected.num_aq_per_bitwidth == actual.num_aq_per_bitwidth
def test_model_can_be_loaded_with_resume(_params): p = _params sample_config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] config = SampleConfig.from_json(str(sample_config_path)) nncf_config = NNCFConfig.from_json(str(sample_config_path)) config.execution_mode = p['execution_mode'] config.current_gpu = 0 config.device = get_device(config) config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: config.dist_url = "tcp://127.0.0.1:9898" config.dist_backend = "nccl" config.rank = 0 config.world_size = 1 configure_distributed(config) model_name = config['model'] model = load_model(model_name, pretrained=False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) nncf_config = register_default_init_args( nncf_config, train_loader=create_ones_mock_dataloader(nncf_config)) model.to(config.device) model, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() checkpoint = torch.load(checkpoint_path, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True)
def test_distributed_init_struct(): class FakeModelClass(): def __init__(self, model_: nn.Module): self.model = model_ def unwrap(self): return self.model config = get_basic_pruning_config() init_loader = create_ones_mock_dataloader(config) wrapper_callback = FakeModelClass unwrapper_callback = lambda x: x.unwrap() nncf_config = register_default_init_args( config, init_loader, distributed_callbacks=(wrapper_callback, unwrapper_callback)) dist_callbacks = nncf_config.get_extra_struct(DistributedCallbacksArgs) model = PruningTestModel() wrapped_model = dist_callbacks.wrap_model(model) assert isinstance(wrapped_model, FakeModelClass) unwrapped_model = dist_callbacks.unwrap_model(wrapped_model) assert unwrapped_model == model
def precision_init_dumping_worker(gpu, ngpus_per_node, config, tmp_path): distributed_init_test_default(gpu, ngpus_per_node, config) data_loader = create_rank_dataloader(config, gpu) model = safe_thread_call(partial(mobilenet_v2, pretrained=True)) model.eval() criterion = torch.nn.MSELoss().cuda(config.gpu) config = register_default_init_args(config, data_loader, criterion=criterion, autoq_eval_fn=lambda *x: 0, val_loader=data_loader) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) # just to reproduce the same scale values without Dropout quant_model.eval() act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module) out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank) torch.save(act_bitwidth_per_scope, str(out_file_path))
def test_can_resume_with_manual_init(mocker, desc, _nncf_caplog): config = desc.config config_to_resume = desc.config_to_resume config = register_default_init_args( config, train_loader=create_ones_mock_dataloader(config)) all_spies = desc.setup_init_spies(mocker) init_spy = mocker.spy(PTCompressionAlgorithmBuilder, '__init__') get_setup_spy = mocker.spy(QuantizationBuilder, '_get_quantizer_setup') _, compression_ctrl = create_compressed_model_and_algo_for_test( desc.model_creator(), config) desc.check_precision_init(compression_ctrl) for m in all_spies: m.assert_called() m.reset_mock() get_setup_spy.assert_called() get_setup_spy.reset_mock() compression_state = compression_ctrl.get_compression_state() register_bn_adaptation_init_args(config_to_resume) _, compression_ctrl = create_compressed_model_and_algo_for_test( desc.model_creator(), config_to_resume, compression_state=compression_state) if config_to_resume is not None and config_to_resume['compression'][ 'initializer']: assert not init_spy.call_args[0][2] for m in all_spies: m.assert_not_called() get_setup_spy.assert_not_called() desc.check_precision_init(compression_ctrl)
def wrap_nncf_model(model, cfg, data_loader_for_init=None, get_fake_input_func=None, export=False): """ The function wraps mmaction model by NNCF Note that the parameter `get_fake_input_func` should be the function `get_fake_input` -- cannot import this function here explicitly """ check_nncf_is_enabled() from nncf.config import NNCFConfig from nncf.torch import (create_compressed_model, register_default_init_args) from nncf.torch.dynamic_graph.io_handling import nncf_model_input from nncf.torch.dynamic_graph.trace_tensor import TracedTensor from nncf.torch.initialization import DefaultInitializingDataLoader class MMInitializeDataLoader(DefaultInitializingDataLoader): def get_inputs(self, dataloader_output): return (), dataloader_output pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True) nncf_config = NNCFConfig(cfg.nncf_config) logger = get_root_logger(cfg.log_level) if data_loader_for_init: wrapped_loader = MMInitializeDataLoader(data_loader_for_init) nncf_config = register_default_init_args( nncf_config, wrapped_loader, device=next(model.parameters()).device) if cfg.get('resume_from'): checkpoint_path = cfg.get('resume_from') assert is_checkpoint_nncf(checkpoint_path), ( 'It is possible to resume training with NNCF compression from NNCF checkpoints only. ' 'Use "load_from" with non-compressed model for further compression by NNCF.' ) elif cfg.get('load_from'): checkpoint_path = cfg.get('load_from') if not is_checkpoint_nncf(checkpoint_path): checkpoint_path = None logger.info('Received non-NNCF checkpoint to start training ' '-- initialization of NNCF fields will be done') else: checkpoint_path = None if not data_loader_for_init and not checkpoint_path: raise RuntimeError('Either data_loader_for_init or NNCF pre-trained ' 'model checkpoint should be set') if checkpoint_path: logger.info(f'Loading NNCF checkpoint from {checkpoint_path}') logger.info( 'Please, note that this first loading is made before addition of ' 'NNCF FakeQuantize nodes to the model, so there may be some ' 'warnings on unexpected keys') resuming_state_dict = load_checkpoint(model, checkpoint_path) logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}') else: resuming_state_dict = None if "nncf_compress_postprocessing" in cfg: # NB: This parameter is used to choose if we should try to make NNCF compression # for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`), # or make NNCF compression of the part of the model without postprocessing # (`nncf_compress_postprocessing=False`). # Our primary goal is to make NNCF compression of such big part of the model as # possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas # `nncf_compress_postprocessing=False` is our fallback decision. # When we manage to enable NNCF compression for sufficiently many models, # we should keep one choice only. nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing') logger.debug('set should_compress_postprocessing=' f'{nncf_compress_postprocessing}') else: nncf_compress_postprocessing = True def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func): input_size = nncf_config.get("input_info").get('sample_size') assert get_fake_input_func is not None assert len(input_size) == 4 and input_size[0] == 1 H, W, C = input_size[2], input_size[3], input_size[1] device = next(model.parameters()).device with no_nncf_trace(): return get_fake_input_func(cfg, orig_img_shape=tuple([H, W, C]), device=device) def dummy_forward(model): fake_data = _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func) img = fake_data["imgs"] img = nncf_model_input(img) if export: img, _, _ = model.reshape_input(imgs=img) model(imgs=img) else: model(imgs=img, return_loss=False) def wrap_inputs(args, kwargs): # during dummy_forward if not len(kwargs): if not isinstance(args[0][0], TracedTensor): args[0][0] = nncf_model_input(args[0][0]) return args, kwargs # during building original graph if not kwargs.get('return_loss') and kwargs.get('forward_export'): return args, kwargs # during model's forward assert 'imgs' in kwargs, 'During model forward imgs must be in kwargs' img = kwargs['imgs'] if isinstance(img, list): assert len(img) == 1, 'Input list must have a length 1' assert torch.is_tensor( img[0]), 'Input for a model must be a tensor' if not isinstance(img[0], TracedTensor): img[0] = nncf_model_input(img[0]) else: assert torch.is_tensor(img), 'Input for a model must be a tensor' if not isinstance(img, TracedTensor): img = nncf_model_input(img) kwargs['imgs'] = img return args, kwargs model.dummy_forward_fn = dummy_forward if 'log_dir' in nncf_config: os.makedirs(nncf_config['log_dir'], exist_ok=True) compression_ctrl, model = create_compressed_model( model, nncf_config, dummy_forward_fn=dummy_forward, wrap_inputs_fn=wrap_inputs, compression_state=resuming_state_dict) return compression_ctrl, model