def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() model = params.model_creator().cuda() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) config = register_default_init_args(config, train_loader, criterion) mocked_trace = mocker.patch( 'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces', autospec=True) pregen_traces_for_all_layers = params.avg_traces_creator(model, 'cuda') # There may be less traces required to be calculated during HAWQ than there are weightable layers. def side_effect_fn(self, max_iter=500, tolerance=1e-5): #pylint:disable=protected-access return pregen_traces_for_all_layers[:len(self._parameter_handler. parameters)] mocked_trace.side_effect = side_effect_fn model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix()) graph_dir = os.path.join('quantized', 'hawq') check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, config_creator: Callable, filename_suffix: str): num_data_points = 100 batch_size = 10 config = config_creator(batch_size, num_data_points) model = MobileNetV2(num_classes=10) model.eval() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config.get("model_size"), dataset_dir, batch_size) config = register_default_init_args(config, criterion, train_loader) mocked_trace = mocker.patch( 'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces' ) mock_avg_traces = get_mock_avg_traces(model) mocked_trace.return_value = mock_avg_traces from torchvision.models.mobilenet import model_urls load_state(model, model_zoo.load_url(model_urls['mobilenet_v2'])) model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) model = model.cuda() all_quantizers_per_full_scope = get_all_quantizers_per_full_scope(model) graph = get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope) path_to_dot = 'mobilenet_v2_mixed_bitwidth_graph_{}.dot'.format( filename_suffix) check_graph(graph, path_to_dot, os.path.join('quantized', 'hawq'), sort_dot_graph=False)
def test_hawq_hw_vpu_config_e2e(_seed, dataset_dir, tmp_path): config = HAWQConfigBuilder().for_vpu().with_ratio(1.01).build() model = MobileNetV2(num_classes=10) criterion = nn.CrossEntropyLoss() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) config = register_default_init_args(config, train_loader, criterion) create_compressed_model_and_algo_for_test(model, config)
def test_hawq_raises_error_if_method_returns_none(mocker, method_name): config = create_hawq_test_config() model = MockModel() config = register_default_init_args(config, mocker.stub(), mocker.stub()) mocker.patch('nncf.quantization.algo.QuantizationController._do_range_init') mocker.patch('nncf.quantization.init_precision.HAWQPrecisionInitializer._calc_traces') mocked_trace = mocker.patch('nncf.quantization.init_precision.HAWQPrecisionInitializer.' + method_name) mocked_trace.return_value = None with pytest.raises(RuntimeError): create_compressed_model_and_algo_for_test(model, config)
def test_staged_scheduler_with_hawq(): config = get_squeezenet_quantization_config() config['compression'].update({ 'params': { "activations_quant_start_epoch": 1, "weights_quant_start_epoch": 2, }, 'initializer': { 'range': { 'num_init_samples': 1 }, 'precision': { "type": "hawq", "num_data_points": 1, "iter_number": 1, "tolerance": 1 } } }) num_classes = 10 model = squeezenet1_1(num_classes=num_classes, dropout=0) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader( HawqDatasetMock(input_sample_size[1:], num_classes), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) criterion = nn.CrossEntropyLoss().cuda() config = register_default_init_args(config, data_loader, criterion) model, algo = create_compressed_model_and_algo_for_test(model, config) scheduler = algo.scheduler for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): if module.is_weights: assert not module.is_enabled_quantization() else: assert module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert module.is_enabled_quantization()
def nncf_config_with_default_init_args_(mocker): config = NNCFConfig.from_dict(CONFIG_WITH_ALL_INIT_TYPES) train_loader = DataLoader( OnesDatasetMock(INPUT_SAMPLE_SIZE[1:]), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) mocker_criterion = mocker.stub() mocker_criterion.batch_size = 1 config = register_default_init_args(config, train_loader, mocker_criterion) return config
def test_hawq_behaviour__if_method_returns_none(mocker, method_name, expected_behavior): config = HAWQConfigBuilder().build() config['quantizer_setup_type'] = 'pattern_based' model = MockModel() config = register_default_init_args(config, mocker.stub(), mocker.stub()) mocker.patch('nncf.quantization.algo.QuantizationController._do_range_init') mocker.patch('nncf.quantization.precision_init.hawq_init.HAWQPrecisionInitializer._calc_traces') mocked_trace = mocker.patch('nncf.quantization.precision_init.hawq_init.HAWQPrecisionInitializer.' + method_name) mocked_trace.return_value = None with expected_behavior: create_compressed_model_and_algo_for_test(model, config)
def test_hawq_manual_configs(manual_config_params): config_name, bit_stats = manual_config_params config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name)) config['quantizer_setup_type'] = 'pattern_based' config = register_default_init_args(config, train_loader=create_mock_dataloader(config), criterion=None) model = load_model(config['model'], pretrained=False) model.eval() _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) table = compression_ctrl.non_stable_metric_collectors[0].get_bits_stat() # pylint: disable=protected-access assert table._rows == bit_stats
def test_hawq_manual_configs(manual_config_params, hw_config): config_name, bit_stats = manual_config_params config = NNCFConfig.from_json(str(EXAMPLES_DIR.joinpath('classification', 'configs', 'quantization') / config_name)) config = register_default_init_args(config, criterion=None, train_loader=create_mock_dataloader(config)) if hw_config: config['hw_config'] = hw_config.value model = load_model(config['model'], pretrained=False) model.eval() _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) table = compression_ctrl.get_bit_stats() # pylint: disable=protected-access assert table._rows == bit_stats
def hawq_dumping_worker(gpu, ngpus_per_node, config, tmp_path): data_loader = distributed_init_test_default(gpu, ngpus_per_node, config) model = safe_thread_call(partial(mobilenet_v2, pretrained=True)) model.eval() criterion = torch.nn.MSELoss().cuda(config.gpu) config = register_default_init_args(config, criterion, data_loader) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) # just to reproduce the same scale values without Dropout quant_model.eval() act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module) out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank) torch.save(act_bitwidth_per_scope, str(out_file_path))
def hawq_dumping_worker(gpu, ngpus_per_node, config, tmp_path): config.batch_size = 3 config.workers = 3 config.gpu = gpu config.ngpus_per_node = ngpus_per_node config.rank = gpu config.distributed = True torch.distributed.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:8899', world_size=config.world_size, rank=config.rank) model = safe_thread_call(partial(mobilenet_v2, pretrained=True)) model.eval() input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = torch.utils.data.DataLoader(RankDatasetMock( input_sample_size[1:], config.rank), batch_size=3, num_workers=1, shuffle=False) criterion = torch.nn.MSELoss().cuda(config.gpu) config = register_default_init_args(config, criterion, data_loader) quant_model, compression_algo = create_compressed_model_and_algo_for_test( model, config) torch.cuda.set_device(config.gpu) quant_model.cuda(config.gpu) config.batch_size = int(config.batch_size / ngpus_per_node) config.workers = int(config.workers / ngpus_per_node) quant_model = torch.nn.parallel.DistributedDataParallel( quant_model, device_ids=[config.gpu]) compression_algo.distributed() # just to reproduce the same scale values without Dropout quant_model.eval() act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module) out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank) torch.save(act_bitwidth_per_scope, str(out_file_path))
def test_autoq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() model = params.model_creator().cuda() config['log_dir'] = str(tmp_path) if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) from nncf.automl.agent.ddpg.ddpg import DDPG random_action_spy = mocker.spy(DDPG, 'random_action') select_action_spy = mocker.spy(DDPG, 'select_action') from nncf.quantization.precision_init.autoq_init import AutoQPrecisionInitializer autoq_obj_init_spy = mocker.spy(AutoQPrecisionInitializer, '__init__') config = register_default_init_args(config, train_loader=train_loader, autoq_eval_fn=lambda *x: random(), autoq_eval_loader=train_loader) model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) bw_init_config = config['compression']['initializer']['precision'] learning_iter_number = bw_init_config['iter_number'] - bw_init_config[ 'warmup_iter_number'] experimental_ctrl = autoq_obj_init_spy.call_args[0][1] n_quantizer = len(experimental_ctrl.all_quantizations) assert random_action_spy.call_count == bw_init_config[ 'warmup_iter_number'] * n_quantizer assert select_action_spy.call_count == learning_iter_number * ( n_quantizer + 1) + bw_init_config['warmup_iter_number'] path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix()) graph_dir = os.path.join('quantized', 'autoq') check_bitwidth_graph(algo_ctrl, model, path_to_dot, graph_dir)
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, params): config = params.config_builder.build() model = params.model_creator() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config, dataset_dir) config = register_default_init_args(config, train_loader, criterion) mocked_trace = mocker.patch('nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces') mocked_trace.return_value = params.avg_traces_creator(model, 'cuda') model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) model = model.cuda() all_quantizers_per_full_scope = HAWQDebugger.get_all_quantizers_per_full_scope(model) quantizer_switcher = QuantizersSwitcher(list(all_quantizers_per_full_scope.values())) # graph may not contain some quantizers (e.g. in staged scenario) quantizer_switcher.enable_quantizers() model.rebuild_graph() groups_of_adjacent_quantizers = GroupsOfAdjacentQuantizers(algo_ctrl) graph = HAWQDebugger.get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope, groups_of_adjacent_quantizers) path_to_dot = '{}_{}.dot'.format(params.model_creator.__name__, params.config_builder.filename_suffix()) check_graph(graph, path_to_dot, os.path.join('quantized', 'hawq'), sort_dot_graph=False)
def wrap_nncf_model(model, cfg, data_loader_for_init=None, get_fake_input_func=None): """ The function wraps mmdet model by NNCF Note that the parameter `get_fake_input_func` should be the function `get_fake_input` -- cannot import this function here explicitly """ check_nncf_is_enabled() pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True) nncf_config = NNCFConfig(cfg.nncf_config) logger = get_root_logger(cfg.log_level) if data_loader_for_init: wrapped_loader = MMInitializeDataLoader(data_loader_for_init) nncf_config = register_default_init_args(nncf_config, None, wrapped_loader) if cfg.get('resume_from'): checkpoint_path = cfg.get('resume_from') assert is_checkpoint_nncf(checkpoint_path), ( 'It is possible to resume training with NNCF compression from NNCF checkpoints only. ' 'Use "load_from" with non-compressed model for further compression by NNCF.' ) elif cfg.get('load_from'): checkpoint_path = cfg.get('load_from') if not is_checkpoint_nncf(checkpoint_path): checkpoint_path = None logger.info('Received non-NNCF checkpoint to start training ' '-- initialization of NNCF fields will be done') else: checkpoint_path = None if not data_loader_for_init and not checkpoint_path: raise RuntimeError('Either data_loader_for_init or NNCF pre-trained ' 'model checkpoint should be set') if checkpoint_path: logger.info(f'Loading NNCF checkpoint from {checkpoint_path}') logger.info( 'Please, note that this first loading is made before addition of ' 'NNCF FakeQuantize nodes to the model, so there may be some ' 'warnings on unexpected keys') resuming_state_dict = load_checkpoint(model, checkpoint_path) logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}') else: resuming_state_dict = None if "nncf_compress_postprocessing" in cfg: # NB: This parameter is used to choose if we should try to make NNCF compression # for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`), # or make NNCF compression of the part of the model without postprocessing # (`nncf_compress_postprocessing=False`). # Our primary goal is to make NNCF compression of such big part of the model as # possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas # `nncf_compress_postprocessing=False` is our fallback decision. # When we manage to enable NNCF compression for sufficiently many models, # we should keep one choice only. nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing') logger.debug('set should_compress_postprocessing=' f'{nncf_compress_postprocessing}') else: nncf_compress_postprocessing = True def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func): input_size = nncf_config.get("input_info").get('sample_size') assert get_fake_input_func is not None assert len(input_size) == 4 and input_size[0] == 1 H, W, C = input_size[2], input_size[3], input_size[1] device = next(model.parameters()).device return get_fake_input_func(cfg, orig_img_shape=tuple([H, W, C]), device=device) def dummy_forward(model): fake_data = _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func) img, img_metas = fake_data["img"], fake_data["img_metas"] img = nncf_model_input(img) if nncf_compress_postprocessing: ctx = model.forward_export_context(img_metas) logger.debug( f"NNCF will compress a postprocessing part of the model") else: ctx = model.forward_dummy_context(img_metas) logger.debug( f"NNCF will NOT compress a postprocessing part of the model") with ctx: model(img) model.dummy_forward_fn = dummy_forward compression_ctrl, model = create_compressed_model( model, nncf_config, dummy_forward_fn=dummy_forward, resuming_state_dict=resuming_state_dict) model = change_export_func_first_conv(model) return compression_ctrl, model
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--epochs", type=int, default=50, help="number of epochs to train (default: 50)" ) parser.add_argument( "--lr", type=float, default=0.05, help="learning rate (default: 0.05)" ) parser.add_argument( "--enable_nncf_compression", action="store_true", default=False, help="nncf compression flag (default: False)", ) parser.add_argument("--seed", type=int, default=1, help="random seed (default: 1)") parser.add_argument( "--ckpt_filename", type=str, default="resnet18_cifar10.pth", help="file name for model checkpoint (default: resnet18_cifar10.pth)", ) parser.add_argument( "--starting_checkpoint", type=str, default=None, help="checkpoint file name to start training from (default: None)", ) args = parser.parse_args() print(args) torch.manual_seed(args.seed) input_size, num_classes, train_dataset, test_dataset = get_CIFAR10() kwargs = {"num_workers": 8, "pin_memory": True} train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=512, shuffle=True, **kwargs ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=5000, shuffle=False, **kwargs ) model = Model() model = model.cuda() if args.starting_checkpoint is not None: model.load_state_dict(torch.load(args.starting_checkpoint)) compression_ctrl = None if args.enable_nncf_compression: nncf_config_dict = { "compression": { "algorithm": "quantization", "initializer": {"range": {"num_init_steps": 5}}, } } nncf_config = NNCFConfig(nncf_config_dict) nncf_config = register_default_init_args(nncf_config, None, train_loader) compression_ctrl, model = create_compressed_model(model, nncf_config) if args.enable_nncf_compression: milestones = [5, 10] else: milestones = [25, 40] optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4 ) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=0.1 ) for epoch in range(1, args.epochs + 1): train(model, train_loader, optimizer, epoch, compression_ctrl) test(model, test_loader) scheduler.step() if compression_ctrl is not None: compression_ctrl.scheduler.epoch_step() torch.save(model.state_dict(), args.ckpt_filename)