def test_export_stacked_bi_lstm(tmp_path): p = LSTMTestSizes(3, 3, 3, 3) patch_torch_operators() config = get_empty_config(input_sample_size=(1, p.hidden_size, p.input_size)) config['compression'] = {'algorithm': 'quantization'} config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') # TODO: batch_first=True fails with building graph: ambiguous call to mul or sigmoid test_rnn = NNCF_RNN('LSTM', input_size=p.input_size, hidden_size=p.hidden_size, num_layers=2, bidirectional=True, batch_first=False) algo, model = create_compressed_model(test_rnn, config) test_path = str(tmp_path.joinpath('test.onnx')) algo.export_model(test_path) assert os.path.exists(test_path) onnx_num = 0 model = onnx.load(test_path) # pylint: disable=no-member for node in model.graph.node: if node.op_type == 'FakeQuantize': onnx_num += 1 assert onnx_num == 54
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(tmp_path): patch_torch_operators() config = get_empty_config() config["compression"] = [{ "algorithm": "magnitude_sparsity", "weight_importance": "abs", "params": { "schedule": "multistep", "sparsity_levels": [0.3, 0.5] } }, { "algorithm": "quantization" }] config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') _, model = create_compressed_model(MagnitudeTestModel(), config) # load_state doesn't support CPU + Quantization sparse_model = torch.nn.DataParallel(model) sparse_model.cuda() with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) reset_context('orig') reset_context('quantized_graphs') config = get_empty_config() config.log_dir = str(tmp_path) config["compression"] = [{ "algorithm": "const_sparsity" }, { "algorithm": "quantization" }] _, const_sparse_model = create_compressed_model(MagnitudeTestModel(), config) load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.get_nncf_wrapped_module().conv1.pre_ops['0'] check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.get_nncf_wrapped_module().conv2.pre_ops['0'] check_equal(ref_mask_2, op.operand.binary_mask)
def create_model(config): input_info_list = create_input_infos(config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) compression_algo, ssd_net = create_compressed_model(ssd_net, config) ssd_net = compression_algo.model weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.train() model, _ = prepare_model_for_execution(ssd_net, config) return compression_algo, model
def test_export_lstm_cell(tmp_path): patch_torch_operators() config = get_empty_config(model_size=1, input_sample_size=(1, 1)) config['compression'] = {'algorithm': 'quantization'} config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') algo, model = create_compressed_model(LSTMCellNNCF(1, 1), config) test_path = str(tmp_path.joinpath('test.onnx')) algo.export_model(test_path) assert os.path.exists(test_path) onnx_num = 0 model = onnx.load(test_path) # pylint: disable=no-member for node in model.graph.node: if node.op_type == 'FakeQuantize': onnx_num += 1 assert onnx_num == 13
def test_load_state_sets_initialized_flag(tmp_path): config = get_basic_quantization_config() config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') _, model = create_compressed_model(TwoConvTestModel(), config) load_state( model, { 'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor( [1.0]), # quantizer of 1st conv's weights 'module.features.1.0.pre_ops.0.op.scale': torch.tensor( [1.0]) # quantizer of 2nd conv's weights }) quantizers = get_all_modules_by_type(model, 'SymmetricQuantizer') for name, module in quantizers.items(): if 'activation_quantizers' in name or 'UpdateInputs' in name: assert not module.initialized else: assert module.initialized
def test_model_can_be_loaded_with_resume(_params, tmp_path): p = _params config_path = p['nncf_config_path'] checkpoint_path = p['checkpoint_path'] config = Config.from_json(str(config_path)) config.execution_mode = p['execution_mode'] config.current_gpu = 0 config.log_dir = str(tmp_path) config.device = get_device(config) config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: config.dist_url = "tcp://127.0.0.1:9898" config.dist_backend = "nccl" config.rank = 0 config.world_size = 1 configure_distributed(config) model_name = config['model'] model = load_model(model_name, pretrained=False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) patch_torch_operators() compression_algo, model = create_compressed_model(model, config) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_algo.distributed() reset_context('orig') reset_context('quantized_graphs') checkpoint = torch.load(checkpoint_path, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) compression_algo, model = create_compressed_model(model, config) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_algo.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, compression_algo, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_algo) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_loader, train_sampler, val_loader = create_dataloaders(config) if config.mode.lower() == 'test': print_statistics(compression_algo.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_algo.initialize(train_loader) train(config, compression_algo, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_main_process(): configure_logging(config) print_args(config) print(config) config.device = get_device(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) weights = config.get('weights') model = load_model(config.model, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=num_classes, model_params=config.get('model_params', {})) compression_algo, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_algo.distributed() resuming_checkpoint = config.resuming_checkpoint if resuming_checkpoint is not None: if not config.pretrained: # Load the previously saved model state model, _, _, _, _ = \ load_checkpoint(model, resuming_checkpoint, config.device, compression_scheduler=compression_algo.scheduler) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.mode.lower() == 'test': print(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) loaders, w_class = load_dataset(dataset, config) _, val_loader = loaders test(model, val_loader, w_class, color_encoding, config) print_statistics(compression_algo.statistics()) elif config.mode.lower() == 'train': loaders, w_class = load_dataset(dataset, config) train_loader, val_loader = loaders if not resuming_checkpoint: compression_algo.initialize(train_loader) model = \ train(model, model_without_dp, compression_algo, train_loader, val_loader, w_class, color_encoding, config) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def test_number_of_calling_fq_for_gnmt(self, tmp_path): torch.cuda.set_device(0) device = torch.device('cuda') batch_first = False vocab_size = 32000 model_config = { 'hidden_size': 100, 'vocab_size': vocab_size, 'num_layers': 4, 'dropout': 0.2, 'batch_first': batch_first, 'share_embedding': True, } batch_size = 128 sequence_size = 50 input_sample_size = (batch_size, sequence_size) if batch_first else (sequence_size, batch_size) patch_torch_operators() config = get_empty_config(input_sample_size=input_sample_size) config['compression'] = \ {'algorithm': 'quantization', 'quantize_inputs': True, 'quantizable_subgraph_patterns': [["linear", "__add__"], ["sigmoid", "__mul__", "__add__"], ["__add__", "tanh", "__mul__"], ["sigmoid", "__mul__"]], 'scopes_without_shape_matching': ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/BahdanauAttention[attn]'], 'disable_function_quantization_hooks': True} config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') model = GNMT(**model_config) model = replace_lstm(model) model.to(device) def dummy_forward_fn(model, seq_len=sequence_size): def gen_packed_sequence(): seq_list = [] seq_lens = torch.LongTensor(batch_size).random_(1, seq_len + 1) seq_lens = torch.sort(seq_lens, descending=True).values for seq_size in seq_lens: seq_list.append( torch.LongTensor(seq_size.item()).random_( 1, vocab_size).to(device)) padded_seq_batch = torch.nn.utils.rnn.pad_sequence( seq_list, batch_first=batch_first) return padded_seq_batch, seq_lens x_data, seq_lens = gen_packed_sequence() input_encoder = x_data input_enc_len = seq_lens.to(device) input_decoder = gen_packed_sequence()[0] model.forward(input_encoder, input_enc_len, input_decoder) _, model = create_compressed_model(model, config, dummy_forward_fn) model.to(device) class Counter: def __init__(self): self.count = 0 def next(self): self.count += 1 def hook(model, input_, counter): counter.next() counters = {} for name, quantizer in model.all_quantizations.items(): counter = Counter() counters[name] = counter quantizer.register_forward_pre_hook(partial(hook, counter=counter)) with context('quantized_graphs') as ctx: dummy_forward_fn(model) assert ctx.graph.get_nodes_count() == 239 assert len(counters) == 68 for name, counter in counters.items(): if 'cell' in name or "LSTMCellForwardNNCF" in name: assert counter.count == sequence_size, name else: assert counter.count == 1, name new_seq_len = int(sequence_size / 2) dummy_forward_fn(model, new_seq_len) assert ctx.graph.get_nodes_count() == 239 assert len(counters) == 68 for name, counter in counters.items(): if 'cell' in name or "LSTMCellForwardNNCF" in name: assert counter.count == sequence_size + new_seq_len, name else: assert counter.count == 2, name
def test_number_of_calling_fq_for_lstm(self, tmp_path): p = LSTMTestSizes(1, 1, 1, 5) num_layers = 2 bidirectional = True num_directions = 2 if bidirectional else 1 bias = True batch_first = False patch_torch_operators() config = get_empty_config(input_sample_size=(p.seq_length, p.batch, p.input_size)) config['compression'] = { 'algorithm': 'quantization', 'quantize_inputs': True } config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') test_data = TestLSTMCell.generate_lstm_data(p, num_layers, num_directions, bias=bias, batch_first=batch_first) test_rnn = NNCF_RNN('LSTM', input_size=p.input_size, hidden_size=p.hidden_size, num_layers=num_layers, bidirectional=bidirectional, bias=bias, batch_first=batch_first) TestLSTM.set_ref_lstm_weights(test_data, test_rnn, num_layers, num_directions, bias) test_hidden = TestLSTM.get_test_lstm_hidden(test_data) _ = reset_context('orig') _ = reset_context('quantized_graphs') _, model = create_compressed_model(test_rnn, config) class Counter: def __init__(self): self.count = 0 def next(self): self.count += 1 def hook(model, input_, counter): counter.next() counters = {} for name, quantizer in model.all_quantizations.items(): counter = Counter() counters[name] = counter quantizer.register_forward_pre_hook(partial(hook, counter=counter)) with context('quantized_graphs') as ctx: _ = model(test_data.x, test_hidden) assert ctx.graph.get_nodes_count() == 110 ctx.graph.dump_graph( os.path.join(config.log_dir, "compressed_graph_next.dot")) assert len(counters) == 54 for counter in counters.values(): assert counter.count == p.seq_length
def main_worker_binarization(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) original_model = copy.deepcopy(model) compression_algo, model = create_compressed_model(model, config) if not isinstance(compression_algo, Binarization): raise RuntimeError( "The binarization sample worker may only be run with the binarization algorithm!" ) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_algo.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = model.parameters() compression_config = config['compression'] binarization_config = compression_config if isinstance( compression_config, dict) else compression_config[0] optimizer = get_binarization_optimizer(params_to_optimize, binarization_config) optimizer_scheduler = BinarizationOptimizerScheduler( optimizer, binarization_config) kd_loss_calculator = KDLossCalculator(original_model) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_loader, train_sampler, val_loader = create_dataloaders(config) if config.mode.lower() == 'test': print_statistics(compression_algo.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_algo.initialize(train_loader) batch_multiplier = (binarization_config.get("params", {})).get( "batch_multiplier", 1) train_bin(config, compression_algo, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)