def create_nncf_model_and_single_algo_builder(model: Module, config: NNCFConfig, dummy_forward_fn: Callable[[Module], Any] = None, wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None) \ -> Tuple[NNCFNetwork, PTCompressionAlgorithmController]: assert isinstance(config, NNCFConfig) NNCFConfig.validate(config) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') compressed_model = NNCFNetwork( model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching) algo_names = extract_algorithm_names(config) assert len(algo_names) == 1 algo_name = next(iter(algo_names)) builder_cls = PT_COMPRESSION_ALGORITHMS.get(algo_name) builder = builder_cls(config, should_init=True) return compressed_model, builder
def create_test_dataloaders(config, dataset_dir): input_info = create_input_infos(config)[0] image_size = input_info.shape[-1] batch_size = input_info.shape[0] normalize = transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) train_transforms = transforms.Compose([ transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ]) dummy_config = type('dummy', (object, ), {'dataset_dir': dataset_dir})() train_dataset = create_cifar(dummy_config, dataset_config='cifar10', is_train=True, transform=train_transforms) # Do not set num_workers > 0 here - random hangs occur during pytest runs of this files train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) return train_loader, train_dataset
def get_testing_dataset(dataset_name, path_to_annotations, path_to_imgs, config): # for VOC path_to_imgs = path_to_annotations = voc_root assert dataset_name in ['voc', 'coco'] preprocessing = get_preprocessing(config) input_info_list = create_input_infos(config) image_size = input_info_list[0].shape[-1] transform = BaseTransform(image_size, preprocessing.mean, preprocessing.std, preprocessing.normalize_coef) if dataset_name == 'voc': testing_dataset = VOCDetection( path_to_imgs, [('2007', 'test')], transform=transform, target_transform=VOCAnnotationTransform(keep_difficult=True), return_image_info=True, rgb=preprocessing.rgb) if dataset_name == 'coco': testing_dataset = COCODataset(path_to_annotations, path_to_imgs, transform=transform, scale_bboxes=False, return_image_info=True, rgb=preprocessing.rgb) return testing_dataset
def create_rank_dataloader(config, rank, num_samples=10, batch_size=3): input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = torch.utils.data.DataLoader(RankDatasetMock(input_sample_size[1:], rank, num_samples), batch_size=batch_size, num_workers=0, # workaround shuffle=False, drop_last=True) return data_loader
def sr_dummy_forward_fn(model_, input_sample_sizes: Tuple[List[int]]): device = next(model_.parameters()).device config = {'input_info': [{"sample_size": sizes} for sizes in input_sample_sizes]} input_info_list = create_input_infos(config) tensor_list = [create_mock_tensor(info, device) for info in input_info_list] args = (tuple(tensor_list),) args, _ = sr_wrap_inputs_fn(args, {}) return nncf_model_output(model_(*args))
def create_test_quantization_env(model_creator=BasicConvTestModel, input_info_cfg=None) -> QuantizationEnv: if input_info_cfg is None: input_info_cfg = {"input_info": {"sample_size": [1, 1, 4, 4]}} model = model_creator() nncf_network = NNCFNetwork(model, input_infos=create_input_infos(input_info_cfg)) hw_config_type = HWConfigType.VPU hw_config_path = HWConfig.get_path_to_hw_config(hw_config_type) hw_config = PTHWConfig.from_json(hw_config_path) setup = PropagationBasedQuantizerSetupGenerator( NNCFConfig(), nncf_network, hw_config=hw_config).generate_setup() dummy_multi_setup = MultiConfigQuantizerSetup.from_single_config_setup( setup) for qp in dummy_multi_setup.quantization_points.values(): qconf_constraint_list = [] qconf = qp.possible_qconfigs[0] bit_set = [8, 4, 2] if 'conv' in str(qp.insertion_point) else [8, 4] for bits in bit_set: adj_qconf = deepcopy(qconf) adj_qconf.num_bits = bits qconf_constraint_list.append(adj_qconf) qp.possible_qconfigs = qconf_constraint_list experimental_builder = ExperimentalQuantizationBuilder( dummy_multi_setup, setup, {}, hw_config) experimental_builder.apply_to(nncf_network) # pylint:disable=line-too-long experimental_ctrl = experimental_builder.build_controller(nncf_network) data_loader = create_ones_mock_dataloader(input_info_cfg) constraints = HardwareQuantizationConstraints() for qid, qp_id_set in experimental_ctrl.module_id_to_qp_id_translation_dict.items( ): first_qp_id_for_this_quantizer_module = next(iter(qp_id_set)) qconfigs = dummy_multi_setup.quantization_points[ first_qp_id_for_this_quantizer_module].possible_qconfigs constraints.add(qid, qconfigs) return QuantizationEnv(nncf_network, experimental_ctrl, constraints, data_loader, lambda *x: 0, hw_config_type=HWConfigType.VPU, params=QuantizationEnvParams( compression_ratio=0.15, eval_subset_ratio=1.0, skip_constraint=False, performant_bw=False, finetune=False, bits=[2, 4, 8], dump_init_precision_data=False))
def create_any_mock_dataloader(dataset_cls: type, config: NNCFConfig, num_samples: int = 1, batch_size: int = 1) -> DataLoader: input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader( dataset_cls(input_sample_size[1:], num_samples), batch_size=batch_size, num_workers=0, # Workaround shuffle=False, drop_last=True) return data_loader
def test_staged_scheduler_with_hawq(): config = get_squeezenet_quantization_config() config['compression'].update({ 'params': { "activations_quant_start_epoch": 1, "weights_quant_start_epoch": 2, }, 'initializer': { 'range': { 'num_init_samples': 1 }, 'precision': { "type": "hawq", "num_data_points": 1, "iter_number": 1, "tolerance": 1 } } }) num_classes = 10 model = squeezenet1_1(num_classes=num_classes, dropout=0) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader(HawqDatasetMock(input_sample_size[1:], num_classes), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) criterion = nn.CrossEntropyLoss().cuda() config = register_default_init_args(config, data_loader, criterion=criterion) model, algo = create_compressed_model_and_algo_for_test(model, config) scheduler = algo.scheduler for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for wq_info in algo.weight_quantizers.values(): assert not wq_info.quantizer_module_ref.is_enabled_quantization() for aq_info in algo.non_weight_quantizers.values(): assert aq_info.quantizer_module_ref.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert module.is_enabled_quantization()
def test_compressed_graph_models_hw(desc, hw_config_type): model = desc.model_builder() config = get_basic_quantization_config_with_hw_config_type(hw_config_type.value, input_sample_size=desc.input_sample_sizes) input_info_list = create_input_infos(config) compressed_model = NNCFNetwork(model, input_infos=input_info_list) # pylint:disable=protected-access quantization_builder = QuantizationBuilder(config, should_init=False) single_config_quantizer_setup = quantization_builder._get_quantizer_setup(compressed_model) sketch_graph = compressed_model.get_original_graph() potential_quantizer_graph = prepare_potential_quantizer_graph(sketch_graph, single_config_quantizer_setup) check_nx_graph(potential_quantizer_graph, desc.dot_filename, _case_dir(hw_config_type.value), sort_dot_graph=False)
def test_staged_scheduler_with_range_init(): config = get_squeezenet_quantization_config() config['compression'].update({ 'params': { "activations_quant_start_epoch": 1, "weights_quant_start_epoch": 2, }, 'initializer': { 'range': { 'num_init_samples': 1 } } }) register_bn_adaptation_init_args(config) model = squeezenet1_1(num_classes=10, dropout=0) input_infos_list = create_input_infos(config) input_sample_size = input_infos_list[0].shape data_loader = DataLoader(OnesDatasetMock(input_sample_size[1:]), batch_size=1, num_workers=0, # Workaround for PyTorch MultiprocessingDataLoader issues shuffle=False) config.register_extra_structs([QuantizationRangeInitArgs(wrap_dataloader_for_init(data_loader))]) model, algo = create_compressed_model_and_algo_for_test(model, config) scheduler = algo.scheduler for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert not module.is_enabled_quantization() scheduler.epoch_step() for wq_info in algo.weight_quantizers.values(): assert not wq_info.quantizer_module_ref.is_enabled_quantization() for aq_info in algo.non_weight_quantizers.values(): assert aq_info.quantizer_module_ref.is_enabled_quantization() scheduler.epoch_step() for module in algo.all_quantizations.values(): assert module.is_enabled_quantization()
def main(): model_bin, model_xml = get_ir_paths(args.model, args.bin) config = NNCFConfig.from_json(args.config) input_infos_list = create_input_infos(config) image_size = input_infos_list[0].shape[-1] size = int(image_size / 0.875) print('IE version: {}'.format(get_version())) # NOTE: importing torch after loading IE to plugin to avoid issue with built-in MKLDNN of PyTorch plugin = IEPlugin(device='CPU', plugin_dirs=args.cpu_plugin_dir) plugin.add_cpu_extension(os.path.join(args.cpu_plugin_dir, "libcpu_extension.so")) net = IENetwork(model=model_xml, weights=model_bin) exec_net = getExecNet(plugin, net) from torch.utils.data import DataLoader from torchvision import datasets from torchvision import transforms val_loader = DataLoader( datasets.ImageFolder(args.data, transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])), batch_size=1, shuffle=False, num_workers=4, pin_memory=True) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) config['log_dir'] = args.output_dir infer_fn = partial(infer_ie_model, net=net) validate_general(val_loader, exec_net, infer_fn) validate_torch_model(os.path.join(args.output_dir, "PTH"), config=config, num_layers=args.num_layers, dump=args.dump, val_loader=val_loader, cuda=args.cuda)
def create_model(config: SampleConfig, resuming_checkpoint: dict = None): input_info_list = create_input_infos(config.nncf_config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu', pickle_module=restricted_pickle_module) sd = sd["state_dict"] load_state(ssd_net, sd) ssd_net.to(config.device) model_state_dict, compression_state = extract_model_and_compression_states( resuming_checkpoint) compression_ctrl, compressed_model = create_compressed_model( ssd_net, config.nncf_config, compression_state) if model_state_dict is not None: load_state(compressed_model, model_state_dict, is_resume=True) compressed_model, _ = prepare_model_for_execution(compressed_model, config) compressed_model.train() return compression_ctrl, compressed_model
def test_gnmt_quantization(_case_config): model = GNMT(vocab_size=32) model = replace_lstm(model) forward_fn_ = gnmt_forward_fn(seq_len=10, batch_size=3, vocab_size=32) config = get_basic_quantization_config(_case_config.quant_type) config["input_info"] = [ { "sample_size": [3, 10], "type": "long" }, { "sample_size": [3], "type": "long" }, { "sample_size": [3, 10], "type": "long" } ] config["compression"].update({ "ignored_scopes": ["GNMT/ResidualRecurrentEncoder[encoder]/Embedding[embedder]", "GNMT/ResidualRecurrentDecoder[decoder]/Embedding[embedder]"]}) compressed_model = NNCFNetwork(model, input_infos=create_input_infos(config), dummy_forward_fn=forward_fn_, wrap_inputs_fn=gnmt_wrap_inputs_fn, scopes_without_shape_matching= ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/' 'BahdanauAttention[attn]']) builder = QuantizationBuilder(config, should_init=False) builder.apply_to(compressed_model) check_model_graph(compressed_model, 'gnmt_variable.dot', _case_config.graph_dir)
def create_compressed_model(model: Module, config: NNCFConfig, compression_state: Optional[Dict[str, Any]] = None, dummy_forward_fn: Callable[[Module], Any] = None, wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None, wrap_outputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None, dump_graphs=True) \ -> Tuple[CompressionAlgorithmController, NNCFNetwork]: """ The main function used to produce a model ready for compression fine-tuning from an original PyTorch model and a configuration object. dummy_forward_fn :param model: The original model. Should have its parameters already loaded from a checkpoint or another source. :param config: A configuration object used to determine the exact compression modifications to be applied to the model :param compression_state: representation of the entire compression state to unambiguously restore the compressed model. Includes builder and controller states. :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build the internal graph representation via tracing. Specifying this is useful when the original training pipeline has special formats of data loader output or has additional *forward* arguments other than input tensors. Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according to the shape specified in the config object. The dummy_forward_fn code MUST contain calls to nncf.nncf_model_input functions made with each compressed model input tensor in the underlying model's args/kwargs tuple, and these calls should be exactly the same as in the wrap_inputs_fn function code (see below); if dummy_forward_fn is specified, then wrap_inputs_fn also must be specified. :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy forward call before passing the inputs to the underlying compressed model. This is required if the model's input tensors that are important for compression are not supplied as arguments to the model's forward call directly, but instead are located in a container (such as list), and the model receives the container as an argument. wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in input, but each tensor in the original input. Must be specified if dummy_forward_fn is specified. :param dump_graphs: Whether or not should also dump the internal graph representation of the original and compressed models in the .dot format into the log directory. :return: A controller for the compression algorithm (or algorithms, in which case the controller is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped as an object of NNCFNetwork.""" if dummy_forward_fn is not None and wrap_inputs_fn is None: raise ValueError( "A custom dummy forward function was specified, but the corresponding input wrapping function " "was not. In case a custom dummy forward function is specified for purposes of NNCF graph " "building, then the wrap_inputs_fn parameter MUST also be specified and be consistent with " "the input wrapping done in dummy_forward_fn.") is_legacy_model_state_dict = compression_state is not None and \ BaseController.BUILDER_STATE not in compression_state and \ BaseController.CONTROLLER_STATE not in compression_state maybe_convert_legacy_names_in_compress_state(compression_state) # Compress model that will be deployed for the inference on target device. No need to compress parts of the # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights. # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode. model.eval() if dump_graphs: if dummy_forward_fn is None: input_info_list = create_input_infos(config) graph_builder = GraphBuilder( custom_forward_fn=create_dummy_forward_fn( input_info_list, with_input_tracing=True)) else: graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn) if is_main_process(): graph = graph_builder.build_graph(model) graph.visualize_graph( osp.join(config.get("log_dir", "."), "original_graph.dot")) set_debug_log_dir(config.get("log_dir", ".")) input_info_list = create_input_infos(config) scopes_without_shape_matching = config.get('scopes_without_shape_matching', []) ignored_scopes = config.get('ignored_scopes') target_scopes = config.get('target_scopes') original_model_accuracy = None if is_accuracy_aware_training(config): if config.has_extra_struct(ModelEvaluationArgs): evaluation_args = config.get_extra_struct(ModelEvaluationArgs) with torch.no_grad(): original_model_accuracy = evaluation_args.eval_fn(model) nncf_logger.info("Non-compressed model accuracy = {}".format( original_model_accuracy)) compressed_model = NNCFNetwork( model, input_infos=input_info_list, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs_fn, wrap_outputs_fn=wrap_outputs_fn, ignored_scopes=ignored_scopes, target_scopes=target_scopes, scopes_without_shape_matching=scopes_without_shape_matching, original_model_accuracy=original_model_accuracy) should_init = compression_state is None builder = create_compression_algorithm_builder(config, should_init) is_state_loadable = not is_legacy_model_state_dict and compression_state is not None if is_state_loadable: builder.load_state(compression_state[BaseController.BUILDER_STATE]) builder.apply_to(compressed_model) compression_ctrl = builder.build_controller(compressed_model) if is_state_loadable: compression_ctrl.load_state( compression_state[BaseController.CONTROLLER_STATE]) # Required to ensure that the model leaving create_compressed_model has correct compressed graph. # In particular, this is currently required for correct functioning of RNNs. compressed_model.rebuild_graph() try: if is_legacy_model_state_dict: from nncf.torch import load_state state_dict_to_load = compression_state.get('state_dict', compression_state) load_state(compressed_model, state_dict_to_load, is_resume=True) finally: if dump_graphs and is_main_process(): compressed_model_graph = compressed_model.get_graph() compressed_model_graph.visualize_graph( osp.join(config.get("log_dir", "."), "compressed_graph.dot")) # Synchronize all processes if run in distributed mode if is_dist_avail_and_initialized(): try: barrier() # Exception can be raised during running barrier # if the backend not in the supported list https://pytorch.org/docs/stable/distributed.html except RuntimeError as err: nncf_logger.warning(err) nncf_logger.warning( "NNCF continues work, while does not guarantee that " "the processes will finish model's compression at the same time. " "If your training pipeline demands the processes be synchronized, please, " "keep attention to that error") return compression_ctrl, compressed_model compressed_model.get_tracing_context().disable_trace_dynamic_graph() return compression_ctrl, compressed_model