def test_load_state_interoperability(_algos, _model_wrapper, is_resume): config_save = get_empty_config() config_save['compression'] = [{ 'algorithm': algo } for algo in _algos['save_algos']] compressed_model_save, _ = create_compressed_model_and_algo_for_test( BasicConvTestModel(), config_save) model_save = _model_wrapper['save_model'](compressed_model_save) saved_model_state = model_save.state_dict() ref_num_loaded = len(saved_model_state) config_resume = get_empty_config() config_resume['compression'] = [{ 'algorithm': algo } for algo in _algos['load_algos']] compressed_model_resume, _ = create_compressed_model_and_algo_for_test( BasicConvTestModel(), config_resume) model_resume = _model_wrapper['resume_model'](compressed_model_resume) if not is_resume or (is_resume and _algos['is_resume_ok']): act_num_loaded = load_state(model_resume, saved_model_state, is_resume) if ('magnitude_sparsity' in _algos['load_algos'] or 'const_sparsity' in _algos['load_algos']) \ and 'rb_sparsity' in _algos['save_algos']: # no need to load _mask and _uniform ref_num_loaded -= 2 assert act_num_loaded == ref_num_loaded else: with pytest.raises(RuntimeError): load_state(model_resume, saved_model_state, is_resume)
def test_scale_and_sign_init_for_quant_algo__after_load_state( self, wrap_dataloader): config = create_config() data_loader = self.create_dataloader(wrap_dataloader, config) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) _, compressed_model = self.create_algo_and_compressed_model(config) ref_loaded_scale_val = torch.ones((1, 1, 1, 1)) * 100 load_state( compressed_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([0.]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': ref_loaded_scale_val # quantizer of 2nd conv's weights }) self.check_sign_and_scale( compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (False, torch.ones(2, 1, 1, 1)), '.*Sequential\\[1\\].*UpdateWeight. *': (True, ref_loaded_scale_val), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*nncf_model_input*': (False, 1) })
def test_can_restore_binary_mask_on_magnitude_algo_resume(): config = get_empty_config() config['compression'] = { "algorithm": "magnitude_sparsity", "params": { "weight_importance": "abs", "schedule": "multistep", "multistep_sparsity_levels": [0.3, 0.5] } } sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) config = get_empty_config() config["compression"] = {"algorithm": "const_sparsity"} const_sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.conv1.pre_ops['0'] check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.conv2.pre_ops['0'] check_equal(ref_mask_2, op.operand.binary_mask)
def test_hawq_on_single_conv_without_quantizers(_seed, dataset_dir, tmp_path, params: HAWQTestParams): config = get_squeezenet_quantization_config(batch_size=params.batch_size) iter_number = params.iter_number tolerance = 4e-4 model = squeezenet1_1(num_classes=10, dropout=0) from torchvision.models.squeezenet import model_urls load_state(model, model_zoo.load_url(model_urls['squeezenet1_1'])) model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) data_loader, _ = create_test_dataloaders(config, dataset_dir) device = next(model.parameters()).device for _, param in model.named_parameters(): param.requires_grad = False first_conv = next(iter(get_all_modules_by_type(model, 'Conv2d').values())) first_conv.weight.requires_grad = True trace_estimator = HessianTraceEstimator(model, default_criterion_fn, criterion, device, data_loader, params.num_data_points) actual_state = trace_estimator.get_average_traces(max_iter=iter_number, tolerance=tolerance) assert math.isclose(actual_state.item(), params.ref_trace, rel_tol=1e-09)
def test_model_can_be_loaded_with_resume(_params): p = _params sample_config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] config = SampleConfig.from_json(str(sample_config_path)) nncf_config = NNCFConfig.from_json(str(sample_config_path)) config.execution_mode = p['execution_mode'] config.current_gpu = 0 config.device = get_device(config) config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: config.dist_url = "tcp://127.0.0.1:9898" config.dist_backend = "nccl" config.rank = 0 config.world_size = 1 configure_distributed(config) model_name = config['model'] model = load_model(model_name, pretrained=False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) model.to(config.device) model, compression_ctrl = create_compressed_model_and_algo_for_test(model, nncf_config) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() checkpoint = torch.load(checkpoint_path, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True)
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker, config_creator: Callable, filename_suffix: str): num_data_points = 100 batch_size = 10 config = config_creator(batch_size, num_data_points) model = MobileNetV2(num_classes=10) model.eval() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config.get("model_size"), dataset_dir, batch_size) config = register_default_init_args(config, criterion, train_loader) mocked_trace = mocker.patch( 'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces' ) mock_avg_traces = get_mock_avg_traces(model) mocked_trace.return_value = mock_avg_traces from torchvision.models.mobilenet import model_urls load_state(model, model_zoo.load_url(model_urls['mobilenet_v2'])) model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config) model = model.cuda() all_quantizers_per_full_scope = get_all_quantizers_per_full_scope(model) graph = get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope) path_to_dot = 'mobilenet_v2_mixed_bitwidth_graph_{}.dot'.format( filename_suffix) check_graph(graph, path_to_dot, os.path.join('quantized', 'hawq'), sort_dot_graph=False)
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker): num_data_points = 100 batch_size = 10 config = create_hawq_test_config(batch_size, num_data_points) model = squeezenet1_1_custom(num_classes=10, pretrained=False, dropout=0) model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config) load_state(model, model_zoo.load_url(model_urls['squeezenet1_1'])) model = model.cuda() device = next(model.parameters()).device criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) train_loader, _ = create_test_dataloaders(config.model_size, dataset_dir, batch_size) mocked_trace = mocker.patch('nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces') num_traces = len(get_all_modules_by_type(model, 'NNCFConv2d')) mock_avg_traces = [torch.Tensor([num_traces - i]).to(device) for i in range(num_traces)] mocked_trace.return_value = mock_avg_traces compression_ctrl.initialize(criterion=criterion, data_loader=train_loader) act_bitwidth_per_scope = get_bitwidth_per_scope(model) path_to_ref = str(TEST_ROOT / 'data/hawq_reference/squeezenet1_1_mixed_bitwidth_per_scope.json') compare_with_ref_if_exists(act_bitwidth_per_scope, path_to_ref)
def build_ssd_vgg(cfg, size, num_classes, config): ssd_vgg = SSD_VGG(cfg, size, num_classes, batch_norm=config.get('batchnorm', False)) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): logger.debug('Loading base network...') basenet_weights = torch.load(config.basenet) new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('features.', '') new_weights[wn] = wv load_state(ssd_vgg.basenet, new_weights, is_resume=False) return ssd_vgg
def build_ssd_mobilenet(cfg, size, num_classes, config): if size != 300: raise ValueError("Only Mobilenet-SSD with input size 300 is supported") mobilenet_ssd = MobileNetSSD(num_classes, cfg) if config.basenet and (config.resuming_checkpoint is None) and (config.weights is None): logger.debug('Loading base network...') basenet_weights = torch.load(config.basenet)['state_dict'] new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('model.', '') new_weights[wn] = wv load_state(mobilenet_ssd.basenet, new_weights, is_resume=False) return mobilenet_ssd
def load_torch_model(config, cuda=False): weights = config.get('weights') model = load_model(config.get('model'), pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params', {})) compression_ctrl, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) if cuda: model = model.cuda() model = torch.nn.DataParallel(model) print_statistics(compression_ctrl.statistics()) return model
def test_load_state_sets_initialized_flag(): config = get_basic_quantization_config() model = TwoConvTestModel() quant_model, _ = create_compressed_model_and_algo_for_test(model, config) load_state(quant_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([1.0]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': torch.tensor([1.0]) # quantizer of 2nd conv's weights }) quantizers = get_all_modules_by_type(quant_model, 'SymmetricQuantizer') for scope, module in quantizers.items(): if 'activation_quantizers' in str(scope) or 'UpdateInputs' in str(scope): assert not module.initialized else: assert module.initialized
def test_scale_and_sign_init_for_quant_algo__after_load_state(self, wrap_dataloader): config = self.create_config() algo, compressed_model = self.create_algo_and_compressed_model(config) load_state(compressed_model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([0.]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': torch.tensor([100]) # quantizer of 2nd conv's weights }) device = next(compressed_model.parameters()).device data_loader = self.create_dataloader(wrap_dataloader, config, device) algo.initialize(data_loader) self.check_sign_and_scale(compressed_model, { '.*Sequential\\[0\\].*UpdateWeight.*': (False, 1), '.*Sequential\\[1\\].*UpdateWeight. *': (False, 100), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*Sequential\\[1\\].*': (True, 24) })
def build_ssd_mobilenet(cfg, size, num_classes, config): if size != 300: raise ValueError("Only Mobilenet-SSD with input size 300 is supported") mobilenet_ssd = MobileNetSSD(num_classes, cfg) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): logger.debug('Loading base network...') # # ** WARNING: torch.load functionality uses Python's pickling facilities that # may be used to perform arbitrary code execution during unpickling. Only load the data you # trust. # basenet_weights = torch.load(config.basenet)['state_dict'] new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('model.', '') new_weights[wn] = wv load_state(mobilenet_ssd.basenet, new_weights, is_resume=False) return mobilenet_ssd
def load_checkpoint(model, model_path, device_name, optimizer=None, compression_scheduler=None): """Loads the model from a specified directory with a specified name Keyword arguments: - model (``nn.Module``): The stored model state is copied to this model instance. - model_path: The model filename. - device_name: Device name for the model to be loaded into. - is_ddp: If true, model will be treated as a DistributedDataParallel instance and the actual model will be loaded into model.module - optimizer (``torch.optim``): The stored optimizer state is copied to this optimizer instance. - compression_ctrl: The compression scheduler for the saved state to be loaded into Returns: The ``model``, ``optimizer``, epoch, mean IoU and ``compression_scheduler``, loaded from the checkpoint. """ assert os.path.isfile( model_path), "The model file \"{0}\" doesn't exist.".format(model_path) # Load the stored model parameters to the model instance # # ** WARNING: torch.load functionality uses Python's pickling facilities that # may be used to perform arbitrary code execution during unpickling. Only load the data you # trust. # checkpoint = torch.load(model_path, map_location=device_name) load_state(model, checkpoint['state_dict'], is_resume=True) if optimizer is not None: optimizer.load_state_dict(checkpoint['optimizer']) epoch = checkpoint['epoch'] miou = checkpoint['miou'] if "scheduler" in checkpoint and compression_scheduler is not None: compression_scheduler.load_state_dict(checkpoint['scheduler']) return model, optimizer, epoch, miou, compression_scheduler
def resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl): best_acc1 = 0 if osp.isfile(resuming_checkpoint): logger.info("=> loading checkpoint '{}'".format(resuming_checkpoint)) checkpoint = torch.load(resuming_checkpoint, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True) if config.mode.lower() == 'train' and config.to_onnx is None: config.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] kd_loss_calculator.original_model.load_state_dict(checkpoint['original_model_state_dict']) compression_ctrl.scheduler.load_state_dict(checkpoint['compression_scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) optimizer_scheduler.load_state_dict(checkpoint['optimizer_scheduler']) logger.info("=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})" .format(resuming_checkpoint, checkpoint['epoch'], best_acc1)) else: logger.info("=> loaded checkpoint '{}'".format(resuming_checkpoint)) else: raise FileNotFoundError("no checkpoint found at '{}'".format(resuming_checkpoint)) return model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1
def build_ssd_vgg(cfg, size, num_classes, config): ssd_vgg = SSD_VGG(cfg, size, num_classes, batch_norm=config.get('batchnorm', False)) if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None): print('Loading base network...') # # ** WARNING: torch.load functionality uses Python's pickling facilities that # may be used to perform arbitrary code execution during unpickling. Only load the data you # trust. # basenet_weights = torch.load(config.basenet) new_weights = {} for wn, wv in basenet_weights.items(): wn = wn.replace('features.', '') new_weights[wn] = wv load_state(ssd_vgg.basenet, new_weights, is_resume=False) return ssd_vgg
def test_load_state_skips_not_matched_params__from_smaller_to_larger(): ref_weights = torch.tensor([[[[3, 2], [2, 3]]]]) ref_bias = torch.tensor([2.]) model_save = BasicConvTestModel(out_channels=2) model_load = BasicConvTestModel(out_channels=1, weight_init=2, bias_init=2) num_loaded = load_state(model_load, model_save.state_dict()) assert num_loaded == 0 act_bias = model_load.conv.bias.data act_weights = model_load.conv.weight.data check_equal(act_bias, ref_bias) check_equal(act_weights, ref_weights)
def test_load_state_skips_not_matched_params__from_larger_to_smaller(): ref_weights = BasicConvTestModel.default_weight() ref_bias = BasicConvTestModel.default_bias() model_save = BasicConvTestModel(out_channels=1, weight_init=2, bias_init=2) model_load = BasicConvTestModel(out_channels=2) num_loaded = load_state(model_load, model_save.state_dict()) act_bias = model_load.conv.bias.data act_weights = model_load.conv.weight.data assert num_loaded == 0 check_equal(act_bias, ref_bias) check_equal(act_weights, ref_weights)
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(tmp_path): config = get_empty_config() config["compression"] = [{ "algorithm": "magnitude_sparsity", "params": { "schedule": "multistep", "multistep_sparsity_levels": [0.3, 0.5], "weight_importance": "abs" } }, { "algorithm": "quantization" }] sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) # load_state doesn't support CPU + Quantization sparse_model = torch.nn.DataParallel(sparse_model) sparse_model.cuda() with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) config = get_empty_config() config["compression"] = [{ "algorithm": "const_sparsity" }, { "algorithm": "quantization" }] const_sparse_model, _ = create_compressed_model_and_algo_for_test( MagnitudeTestModel(), config) load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.get_nncf_wrapped_model().conv1.pre_ops['0'] check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.get_nncf_wrapped_model().conv2.pre_ops['0'] check_equal(ref_mask_2, op.operand.binary_mask)
def test_ordinary_load(algo, _model_wrapper, is_resume): config = get_empty_config() if algo: config['compression'] = {'algorithm': algo} compressed_model_save, _ = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config) model_save = _model_wrapper['save_model'](compressed_model_save) compressed_model_resume, _ = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config) model_resume = _model_wrapper['resume_model'](compressed_model_resume) num_loaded = load_state(model_resume, model_save.state_dict(), is_resume) assert num_loaded == len(model_save.state_dict())
def create_compressed_model(model: Module, config: NNCFConfig, resuming_state_dict: dict = None, dummy_forward_fn: Callable[[Module], Any] = None, wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None, dump_graphs=True,) \ -> Tuple[CompressionAlgorithmController, NNCFNetwork]: """ The main function used to produce a model ready for compression fine-tuning from an original PyTorch model and a configuration object. dummy_forward_fn :param model: The original model. Should have its parameters already loaded from a checkpoint or another source. :param config: A configuration object used to determine the exact compression modifications to be applied to the model :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after building. :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build the internal graph representation via tracing. Specifying this is useful when the original training pipeline has special formats of data loader output or has additional *forward* arguments other than input tensors. Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according to the shape specified in the config object. :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy forward call before passing the inputs to the underlying compressed model. This is required if the model's input tensors that are important for compression are not supplied as arguments to the model's forward call directly, but instead are located in a container (such as list), and the model receives the container as an argument. wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in input, but each tensor in the original input. :param dump_graphs: Whether or not should also dump the internal graph representation of the original and compressed models in the .dot format into the log directory. :return: A controller for the compression algorithm (or algorithms, in which case the controller is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped as an object of NNCFNetwork.""" # Compress model that will be deployed for the inference on target device. No need to compress parts of the # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights. # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode. model.eval() if dump_graphs: if dummy_forward_fn is None: input_info_list = create_input_infos(config) graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=True)) else: graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn) if is_main_process(): graph = graph_builder.build_graph(model) graph.visualize_graph( osp.join(config.get("log_dir", "."), "original_graph.dot")) set_debug_log_dir(config.get("log_dir", ".")) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') compressed_model = NNCFNetwork( model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching) should_init = resuming_state_dict is None compression_algo_builder_list = create_compression_algorithm_builders( config, should_init=should_init) for builder in compression_algo_builder_list: compressed_model = builder.apply_to(compressed_model) compression_ctrl = compressed_model.commit_compression_changes() try: if resuming_state_dict is not None: load_state(compressed_model, resuming_state_dict, is_resume=True) finally: if dump_graphs and is_main_process() and compression_algo_builder_list: if dummy_forward_fn is None: compressed_graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=False)) else: compressed_graph_builder = GraphBuilder( custom_forward_fn=dummy_forward_fn) graph = compressed_graph_builder.build_graph( compressed_model, compressed_model.get_tracing_context()) graph.visualize_graph( osp.join(config.get("log_dir", "."), "compressed_graph.dot")) return compression_ctrl, compressed_model
def main_worker_binarization(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) original_model = copy.deepcopy(model) compression_ctrl, model = create_compressed_model(model, config) if not isinstance(compression_ctrl, BinarizationController): raise RuntimeError("The binarization sample worker may only be run with the binarization algorithm!") if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_ctrl.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = model.parameters() compression_config = config['compression'] binarization_config = compression_config if isinstance(compression_config, dict) else compression_config[0] optimizer = get_binarization_optimizer(params_to_optimize, binarization_config) optimizer_scheduler = BinarizationOptimizerScheduler(optimizer, binarization_config) kd_loss_calculator = KDLossCalculator(original_model) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset) if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_ctrl.initialize(data_loader=train_loader, criterion=criterion) batch_multiplier = (binarization_config.get("params", {})).get("batch_multiplier", 1) train_bin(config, compression_ctrl, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)
def create_compressed_model(model: Module, config: NNCFConfig, resuming_state_dict: dict = None, dummy_forward_fn: Callable[[Module], Any] = None, dump_graphs=True,) \ -> Tuple[CompressionAlgorithmController, NNCFNetwork]: """ The main function used to produce a model ready for compression fine-tuning from an original PyTorch model and a configuration object. dummy_forward_fn :param model: The original model. Should have its parameters already loaded from a checkpoint or another source. :param config: A configuration object used to determine the exact compression modifications to be applied to the model :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after building. :param dummy_forward_fn: will be used instead of a *forward* function call to build the internal graph representation via tracing. Specifying this is useful when the original training pipeline has special formats of data loader output or has additional *forward* arguments other than input tensors. Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according to the shape specified in the config object. :param dump_graphs: Whether or not should also dump the internal graph representation of the original and compressed models in the .dot format into the log directory. :return: A controller for the compression algorithm (or algorithms, in which case the controller is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped as an object of NNCFNetwork.""" if dump_graphs: if dummy_forward_fn is None: input_info_list = create_input_infos(config) graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=True)) else: graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn) if is_main_process(): graph = graph_builder.build_graph(model) graph.dump_graph(osp.join(config.get("log_dir", "."), "original_graph.dot"), extended=True) if is_debug(): set_debug_log_dir(config.get("log_dir", ".")) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') compressed_model = NNCFNetwork( model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching) should_init = resuming_state_dict is None compression_algo_builder_list = create_compression_algorithm_builders( config, should_init=should_init) for builder in compression_algo_builder_list: compressed_model = builder.apply_to(compressed_model) compression_ctrl = compressed_model.commit_compression_changes() if dump_graphs and is_main_process() and compression_algo_builder_list: if dummy_forward_fn is None: compressed_graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=False)) else: compressed_graph_builder = GraphBuilder( custom_forward_fn=dummy_forward_fn) graph = compressed_graph_builder.build_graph( compressed_model, compressed_model.get_tracing_context()) graph.dump_graph(osp.join(config.get("log_dir", "."), "compressed_graph.dot"), extended=True) if resuming_state_dict is not None: load_state(compressed_model, resuming_state_dict, is_resume=True) return compression_ctrl, compressed_model