def test_can_restore_binary_mask_on_magnitude_algo_resume(): config = get_empty_config() config['compression'] = { "algorithm": "magnitude_sparsity", "params": { "weight_importance": "abs", "schedule": "multistep", "multistep_sparsity_levels": [0.3, 0.5] } } sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) config = get_empty_config() config["compression"] = {"algorithm": "const_sparsity"} const_sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.conv1.pre_ops['0'] PTTensorListComparator.check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.conv2.pre_ops['0'] PTTensorListComparator.check_equal(ref_mask_2, op.operand.binary_mask)
def test_scale_and_sign_init_for_quant_algo__after_load_state( self, wrap_dataloader): config = create_config() data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) ref_loaded_scale_val = torch.ones((1, 1, 1, 1)) * 100 load_state( compressed_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([0.]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': ref_loaded_scale_val # quantizer of 2nd conv's weights }) self.check_sign_and_scale( compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (False, torch.ones(2, 1, 1, 1)), '.*Sequential\\[1\\].*UpdateWeight. *': (True, ref_loaded_scale_val), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*nncf_model_input*': (False, 1) })
def load_best_checkpoint(self, model): resuming_checkpoint_path = self._best_checkpoint nncf_logger.info('Loading the best checkpoint found during training ' '{}...'.format(resuming_checkpoint_path)) resuming_checkpoint = torch.load(resuming_checkpoint_path, map_location='cpu') resuming_model_state_dict = resuming_checkpoint.get( 'state_dict', resuming_checkpoint) load_state(model, resuming_model_state_dict, is_resume=True)
def test_hawq_on_single_conv_without_quantizers(_seed, dataset_dir, tmp_path, params: HAWQTestParams, mocker): config = get_squeezenet_quantization_config(batch_size=params.batch_size) iter_number = params.iter_number tolerance = 4e-4 model = squeezenet1_1(num_classes=10, dropout=0) from torchvision.models.squeezenet import model_urls load_state(model, model_zoo.load_url(model_urls['squeezenet1_1'])) criterion = nn.CrossEntropyLoss() ref_trace = params.cpu_ref_trace rtol = 1e-5 if torch.cuda.is_available(): model = model.cuda() criterion = criterion.cuda() ref_trace = params.cuda_ref_trace rtol = 1e-6 if not dataset_dir: dataset_dir = str(tmp_path) data_loader, _ = create_test_dataloaders(config, dataset_dir) device = next(model.parameters()).device for _, param in model.named_parameters(): param.requires_grad = False first_conv = next(iter(get_all_modules_by_type(model, 'Conv2d').values())) first_conv.weight.requires_grad = True ph_import = 'nncf.torch.quantization.hessian_trace.ParameterHandler' sample_rademacher_patch = mocker.patch( f'{ph_import}.sample_rademacher_like_params', autospec=True) sample_normal_patch = mocker.patch( f'{ph_import}.sample_normal_like_params', autospec=True) def mock_sampling_fn(self): # pylint:disable=protected-access return list( map( lambda x: torch.from_numpy(random_sample(x.shape)).to( device=self._device), self.parameters)) sample_rademacher_patch.side_effect = mock_sampling_fn sample_normal_patch.side_effect = mock_sampling_fn trace_estimator = HessianTraceEstimator(model, default_criterion_fn, criterion, device, data_loader, params.num_data_points) actual_state = trace_estimator.get_average_traces(max_iter=iter_number, tolerance=tolerance) assert math.isclose(actual_state.item(), ref_trace, rel_tol=rtol)
def test_is_overflow_fix_applied_model_resumed_correctly(tmp_path): model = TwoConvTestModel() nncf_config = get_config_for_export_mode(False) compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config) compression_state = compression_ctrl.get_compression_state() model_state_dict = compressed_model.state_dict() # Must create new model as the previous one was somehow changed during create_compressed_model_and_algo_for_test() model = TwoConvTestModel() compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config, compression_state=compression_state) load_state(compressed_model, model_state_dict, is_resume=True) are_symmetric_fq_nodes_are_exported_correct_with_overflow_fix( tmp_path, compression_ctrl)
def build_ssd_mobilenet(cfg, size, num_classes, config): if size != 300: raise ValueError("Only Mobilenet-SSD with input size 300 is supported") mobilenet_ssd = MobileNetSSD(num_classes, cfg) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): print('Loading base network...') basenet_weights = torch.load(config.basenet)['state_dict'] new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('model.', '') new_weights[wn] = wv load_state(mobilenet_ssd.basenet, new_weights, is_resume=False) return mobilenet_ssd
def load_torch_model(config, cuda=False): weights = config.get('weights') model = load_model(config.get('model'), pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params', {})) compression_ctrl, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) if cuda: model = model.cuda() model = torch.nn.DataParallel(model) print(compression_ctrl.statistics().to_str()) return model
def load_checkpoint(model, filename, map_location=None, strict=False): """Load checkpoint from a file or URI. Args: model (Module): Module to load checkpoint. filename (str): Either a filepath or URL or modelzoo://xxxxxxx. map_location (str): Same as :func:`torch.load`. strict (bool): Whether to allow different params for the model and checkpoint. Returns: dict or OrderedDict: The loaded checkpoint. """ from nncf.torch.checkpoint_loading import load_state checkpoint = torch.load(filename, map_location=map_location) # get state_dict from checkpoint if isinstance(checkpoint, OrderedDict): state_dict = checkpoint elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: raise RuntimeError( 'No state_dict found in checkpoint file {}'.format(filename)) _ = load_state(model, state_dict, strict) return checkpoint
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume( tmp_path, use_data_parallel): config = get_empty_config() config["compression"] = [{ "algorithm": "magnitude_sparsity", "params": { "schedule": "multistep", "multistep_sparsity_levels": [0.3, 0.5], "weight_importance": "abs" } }, { "algorithm": "quantization" }] register_bn_adaptation_init_args(config) sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) if use_data_parallel: if not torch.cuda.is_available(): pytest.skip("Skipping CUDA test cases for CPU only setups") sparse_model.cuda() sparse_model = torch.nn.DataParallel(sparse_model) with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) config = get_empty_config() config["compression"] = [{ "algorithm": "const_sparsity" }, { "algorithm": "quantization" }] register_bn_adaptation_init_args(config) const_sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.get_nncf_wrapped_model().conv1.pre_ops['0'] PTTensorListComparator.check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.get_nncf_wrapped_model().conv2.pre_ops['0'] PTTensorListComparator.check_equal(ref_mask_2, op.operand.binary_mask)
def test_renamed_activation_quantizer_storage_in_state_dict(): model = SingleConv2dIdentityModel() config = get_basic_quantization_config( input_info={"sample_size": [1, 3, 100, 100]}) register_bn_adaptation_init_args(config) compressed_model, _ = create_compressed_model_and_algo_for_test( model, config) with pytest.deprecated_call(): _ = load_state(compressed_model, old_style_sd, is_resume=True)
def load_best_checkpoint(self, model): # load checkpoint with highest compression rate and positive acc budget possible_checkpoint_rates = self.get_compression_rates_with_positive_acc_budget( ) if not possible_checkpoint_rates: nncf_logger.warning( 'Could not produce a compressed model satisfying the set accuracy ' 'degradation criterion during training. Increasing the number of training ' 'epochs') best_checkpoint_compression_rate = sorted( possible_checkpoint_rates)[-1] resuming_checkpoint_path = self._best_checkpoints[ best_checkpoint_compression_rate] nncf_logger.info('Loading the best checkpoint found during training ' '{}...'.format(resuming_checkpoint_path)) resuming_checkpoint = torch.load(resuming_checkpoint_path, map_location='cpu') resuming_model_state_dict = resuming_checkpoint.get( 'state_dict', resuming_checkpoint) load_state(model, resuming_model_state_dict, is_resume=True)
def build_ssd_mobilenet(cfg, size, num_classes, config): if size != 300: raise ValueError("Only Mobilenet-SSD with input size 300 is supported") mobilenet_ssd = MobileNetSSD(num_classes, cfg) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): logger.debug('Loading base network...') # # ** WARNING: torch.load functionality uses Python's pickling facilities that # may be used to perform arbitrary code execution during unpickling. Only load the data you # trust. # basenet_weights = torch.load(config.basenet, pickle_module=restricted_pickle_module)['state_dict'] new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('model.', '') new_weights[wn] = wv load_state(mobilenet_ssd.basenet, new_weights, is_resume=False) return mobilenet_ssd
def test_model_can_be_loaded_with_resume(_params): p = _params sample_config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] config = SampleConfig.from_json(str(sample_config_path)) nncf_config = NNCFConfig.from_json(str(sample_config_path)) config.execution_mode = p['execution_mode'] config.current_gpu = 0 config.device = get_device(config) config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: config.dist_url = "tcp://127.0.0.1:9898" config.dist_backend = "nccl" config.rank = 0 config.world_size = 1 configure_distributed(config) model_name = config['model'] model = load_model(model_name, pretrained=False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) nncf_config = register_default_init_args( nncf_config, train_loader=create_ones_mock_dataloader(nncf_config)) model.to(config.device) model, compression_ctrl = create_compressed_model_and_algo_for_test( model, nncf_config) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() checkpoint = torch.load(checkpoint_path, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True)
def test_load_state_sets_initialized_flag(): config = get_quantization_config_without_range_init() register_bn_adaptation_init_args(config) model = TwoConvTestModel() quant_model, qctrl = create_compressed_model_and_algo_for_test( model, config) load_state( quant_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor( [1.0]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': torch.ones( 1, 1, 1, 1) # quantizer of 2nd conv's weights }) for wq_info in qctrl.weight_quantizers.values(): assert wq_info.quantizer_module_ref.initialized for aq_info in qctrl.non_weight_quantizers.values(): assert not aq_info.quantizer_module_ref.initialized
def build_ssd_vgg(cfg, size, num_classes, config): ssd_vgg = SSD_VGG(cfg, size, num_classes, batch_norm=config.get('batchnorm', False)) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): print('Loading base network...') # # ** WARNING: torch.load functionality uses Python's pickling facilities that # may be used to perform arbitrary code execution during unpickling. Only load the data you # trust. # basenet_weights = torch.load(config.basenet) new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('features.', '') new_weights[wn] = wv load_state(ssd_vgg.basenet, new_weights, is_resume=False) return ssd_vgg
def test_quantization_ckpt_without_wrapped_bn_loading(): model = ConvBNLayer() config = get_basic_quantization_config( input_info={"sample_size": [1, 3, 100, 100]}) register_bn_adaptation_init_args(config) with pytest.deprecated_call(): compressed_model, _ = \ create_compressed_model_and_algo_for_test(model, config, compression_state=compression_state_without_bn_wrapping) with pytest.deprecated_call(): _ = load_state(compressed_model, sd_without_nncf_bn_wrapping, is_resume=True)
def load_model(model, pretrained=True, num_classes=1000, model_params=None, weights_path: str = None) -> torch.nn.Module: """ ** WARNING: This is implemented using torch.load functionality, which itself uses Python's pickling facilities that may be used to perform arbitrary code execution during unpickling. Only load the data you trust. """ logger.info("Loading model: {}".format(model)) if model_params is None: model_params = {} if model in torchvision.models.__dict__: load_model_fn = partial(torchvision.models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) elif model in custom_models.__dict__: load_model_fn = partial(custom_models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) elif model == "mobilenet_v2_32x32": load_model_fn = partial(MobileNetV2For32x32, num_classes=100) else: raise Exception("Undefined model name") loaded_model = safe_thread_call(load_model_fn) if not pretrained and weights_path is not None: sd = torch.load(weights_path, map_location='cpu', pickle_module=restricted_pickle_module) if MODEL_STATE_ATTR in sd: sd = sd[MODEL_STATE_ATTR] load_state(loaded_model, sd, is_resume=False) return loaded_model
def staged_quantization_main_worker(current_gpu, config): configure_device(current_gpu, config) config.mlflow = SafeMLFLow(config) if is_main_process(): configure_logging(logger, config) print_args(config) set_seed(config) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) model_name = config['model'] is_inception = 'inception' in model_name train_criterion_fn = inception_criterion_fn if is_inception else default_criterion_fn train_loader = train_sampler = val_loader = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) is_export_only = 'export' in config.mode and ( 'train' not in config.mode and 'test' not in config.mode) if is_export_only: assert pretrained or (resuming_checkpoint_path is not None) else: # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader, init_loader = create_data_loaders( config, train_dataset, val_dataset) def autoq_eval_fn(model, eval_loader): _, top5, _ = validate(eval_loader, model, criterion, config) return top5 nncf_config = register_default_init_args( nncf_config, init_loader, criterion=criterion, criterion_fn=train_criterion_fn, autoq_eval_fn=autoq_eval_fn, val_loader=val_loader, device=config.device) # create model model_name = config['model'] model = load_model(model_name, pretrained=pretrained, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params'), weights_path=config.get('weights')) original_model = copy.deepcopy(model) model.to(config.device) resuming_checkpoint = None if resuming_checkpoint_path is not None: resuming_checkpoint = load_resuming_checkpoint( resuming_checkpoint_path) model_state_dict, compression_state = extract_model_and_compression_states( resuming_checkpoint) compression_ctrl, model = create_compressed_model(model, nncf_config, compression_state) if model_state_dict is not None: load_state(model, model_state_dict, is_resume=True) if not isinstance(compression_ctrl, (BinarizationController, QuantizationController)): raise RuntimeError( "The stage quantization sample worker may only be run with the binarization and quantization algorithms!" ) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_ctrl.distributed() params_to_optimize = model.parameters() compression_config = config['compression'] quantization_config = compression_config if isinstance( compression_config, dict) else compression_config[0] optimizer = get_quantization_optimizer(params_to_optimize, quantization_config) optimizer_scheduler = PolyLRDropScheduler(optimizer, quantization_config) kd_loss_calculator = KDLossCalculator(original_model) best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None and config.to_onnx is None: config.start_epoch = resuming_checkpoint['epoch'] best_acc1 = resuming_checkpoint['best_acc1'] kd_loss_calculator.original_model.load_state_dict( resuming_checkpoint['original_model_state_dict']) if 'train' in config.mode: optimizer.load_state_dict(resuming_checkpoint['optimizer']) optimizer_scheduler.load_state_dict( resuming_checkpoint['optimizer_scheduler']) logger.info( "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})". format(resuming_checkpoint_path, resuming_checkpoint['epoch'], best_acc1)) else: logger.info( "=> loaded checkpoint '{}'".format(resuming_checkpoint_path)) log_common_mlflow_params(config) if is_export_only: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True if is_main_process(): statistics = compression_ctrl.statistics() logger.info(statistics.to_str()) if 'train' in config.mode: batch_multiplier = (quantization_config.get("params", {})).get( "batch_multiplier", 1) train_staged(config, compression_ctrl, model, criterion, train_criterion_fn, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1) if 'test' in config.mode: validate(val_loader, model, criterion, config) if 'export' in config.mode: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx))