Example #1
0
def test_export_stacked_bi_lstm(tmp_path):
    p = LSTMTestSizes(3, 3, 3, 3)
    patch_torch_operators()
    config = get_empty_config(input_sample_size=(1, p.hidden_size,
                                                 p.input_size))
    config['compression'] = {'algorithm': 'quantization'}

    config.log_dir = str(tmp_path)
    reset_context('orig')
    reset_context('quantized_graphs')
    # TODO: batch_first=True fails with building graph: ambiguous call to mul or sigmoid
    test_rnn = NNCF_RNN('LSTM',
                        input_size=p.input_size,
                        hidden_size=p.hidden_size,
                        num_layers=2,
                        bidirectional=True,
                        batch_first=False)
    algo, model = create_compressed_model(test_rnn, config)

    test_path = str(tmp_path.joinpath('test.onnx'))
    algo.export_model(test_path)
    assert os.path.exists(test_path)

    onnx_num = 0
    model = onnx.load(test_path)
    # pylint: disable=no-member
    for node in model.graph.node:
        if node.op_type == 'FakeQuantize':
            onnx_num += 1
    assert onnx_num == 54
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(tmp_path):
    patch_torch_operators()
    config = get_empty_config()
    config["compression"] = [{
        "algorithm": "magnitude_sparsity",
        "weight_importance": "abs",
        "params": {
            "schedule": "multistep",
            "sparsity_levels": [0.3, 0.5]
        }
    }, {
        "algorithm": "quantization"
    }]
    config.log_dir = str(tmp_path)
    reset_context('orig')
    reset_context('quantized_graphs')
    _, model = create_compressed_model(MagnitudeTestModel(), config)
    # load_state doesn't support CPU + Quantization
    sparse_model = torch.nn.DataParallel(model)
    sparse_model.cuda()
    with torch.no_grad():
        sparse_model(torch.ones([1, 1, 10, 10]))

    reset_context('orig')
    reset_context('quantized_graphs')
    config = get_empty_config()
    config.log_dir = str(tmp_path)
    config["compression"] = [{
        "algorithm": "const_sparsity"
    }, {
        "algorithm": "quantization"
    }]
    _, const_sparse_model = create_compressed_model(MagnitudeTestModel(),
                                                    config)

    load_state(const_sparse_model, sparse_model.state_dict())

    op = const_sparse_model.get_nncf_wrapped_module().conv1.pre_ops['0']
    check_equal(ref_mask_1, op.operand.binary_mask)

    op = const_sparse_model.get_nncf_wrapped_module().conv2.pre_ops['0']
    check_equal(ref_mask_2, op.operand.binary_mask)
def create_model(config):
    input_info_list = create_input_infos(config)
    image_size = input_info_list[0].shape[-1]
    ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config)
    compression_algo, ssd_net = create_compressed_model(ssd_net, config)
    ssd_net = compression_algo.model
    weights = config.get('weights')
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(ssd_net, sd)
    ssd_net.train()
    model, _ = prepare_model_for_execution(ssd_net, config)
    return compression_algo, model
Example #4
0
def test_export_lstm_cell(tmp_path):
    patch_torch_operators()
    config = get_empty_config(model_size=1, input_sample_size=(1, 1))
    config['compression'] = {'algorithm': 'quantization'}

    config.log_dir = str(tmp_path)
    reset_context('orig')
    reset_context('quantized_graphs')
    algo, model = create_compressed_model(LSTMCellNNCF(1, 1), config)

    test_path = str(tmp_path.joinpath('test.onnx'))
    algo.export_model(test_path)
    assert os.path.exists(test_path)

    onnx_num = 0
    model = onnx.load(test_path)
    # pylint: disable=no-member
    for node in model.graph.node:
        if node.op_type == 'FakeQuantize':
            onnx_num += 1
    assert onnx_num == 13
Example #5
0
def test_load_state_sets_initialized_flag(tmp_path):
    config = get_basic_quantization_config()
    config.log_dir = str(tmp_path)
    reset_context('orig')
    reset_context('quantized_graphs')
    _, model = create_compressed_model(TwoConvTestModel(), config)

    load_state(
        model,
        {
            'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor(
                [1.0]),  # quantizer of 1st conv's weights
            'module.features.1.0.pre_ops.0.op.scale': torch.tensor(
                [1.0])  # quantizer of 2nd conv's weights
        })

    quantizers = get_all_modules_by_type(model, 'SymmetricQuantizer')
    for name, module in quantizers.items():
        if 'activation_quantizers' in name or 'UpdateInputs' in name:
            assert not module.initialized
        else:
            assert module.initialized
def test_model_can_be_loaded_with_resume(_params, tmp_path):
    p = _params
    config_path = p['nncf_config_path']
    checkpoint_path = p['checkpoint_path']

    config = Config.from_json(str(config_path))
    config.execution_mode = p['execution_mode']

    config.current_gpu = 0
    config.log_dir = str(tmp_path)
    config.device = get_device(config)
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        config.dist_url = "tcp://127.0.0.1:9898"
        config.dist_backend = "nccl"
        config.rank = 0
        config.world_size = 1
        configure_distributed(config)

    model_name = config['model']
    model = load_model(model_name,
                       pretrained=False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    patch_torch_operators()
    compression_algo, model = create_compressed_model(model, config)
    model, _ = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_algo.distributed()

    reset_context('orig')
    reset_context('quantized_graphs')
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    load_state(model, checkpoint['state_dict'], is_resume=True)
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))
    compression_algo, model = create_compressed_model(model, config)
    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))
    model, _ = prepare_model_for_execution(model, config)
    if config.distributed:
        compression_algo.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = get_parameter_groups(model, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, compression_algo, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, compression_algo)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_loader, train_sampler, val_loader = create_dataloaders(config)

    if config.mode.lower() == 'test':
        print_statistics(compression_algo.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)
        train(config, compression_algo, model, criterion, is_inception,
              lr_scheduler, model_name, optimizer, train_loader, train_sampler,
              val_loader, best_acc1)
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    print(config)

    config.device = get_device(config)
    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    weights = config.get('weights')
    model = load_model(config.model,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}))
    compression_algo, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)

    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_algo.distributed()

    resuming_checkpoint = config.resuming_checkpoint

    if resuming_checkpoint is not None:
        if not config.pretrained:
            # Load the previously saved model state
            model, _, _, _, _ = \
                load_checkpoint(model, resuming_checkpoint, config.device,
                                compression_scheduler=compression_algo.scheduler)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.mode.lower() == 'test':
        print(model)
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        print("Trainable argument count:{params}".format(params=params))

        model = model.to(config.device)
        loaders, w_class = load_dataset(dataset, config)
        _, val_loader = loaders
        test(model, val_loader, w_class, color_encoding, config)
        print_statistics(compression_algo.statistics())
    elif config.mode.lower() == 'train':
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader = loaders
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)
        model = \
            train(model, model_without_dp, compression_algo, train_loader, val_loader, w_class, color_encoding, config)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
Example #9
0
    def test_number_of_calling_fq_for_gnmt(self, tmp_path):
        torch.cuda.set_device(0)
        device = torch.device('cuda')
        batch_first = False
        vocab_size = 32000
        model_config = {
            'hidden_size': 100,
            'vocab_size': vocab_size,
            'num_layers': 4,
            'dropout': 0.2,
            'batch_first': batch_first,
            'share_embedding': True,
        }
        batch_size = 128
        sequence_size = 50
        input_sample_size = (batch_size,
                             sequence_size) if batch_first else (sequence_size,
                                                                 batch_size)
        patch_torch_operators()
        config = get_empty_config(input_sample_size=input_sample_size)
        config['compression'] = \
            {'algorithm': 'quantization',
             'quantize_inputs': True,
             'quantizable_subgraph_patterns': [["linear", "__add__"],
                                               ["sigmoid", "__mul__", "__add__"],
                                               ["__add__", "tanh", "__mul__"],
                                               ["sigmoid", "__mul__"]],
             'scopes_without_shape_matching':
                 ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/BahdanauAttention[attn]'],
             'disable_function_quantization_hooks': True}

        config.log_dir = str(tmp_path)
        reset_context('orig')
        reset_context('quantized_graphs')

        model = GNMT(**model_config)
        model = replace_lstm(model)
        model.to(device)

        def dummy_forward_fn(model, seq_len=sequence_size):
            def gen_packed_sequence():
                seq_list = []
                seq_lens = torch.LongTensor(batch_size).random_(1, seq_len + 1)
                seq_lens = torch.sort(seq_lens, descending=True).values
                for seq_size in seq_lens:
                    seq_list.append(
                        torch.LongTensor(seq_size.item()).random_(
                            1, vocab_size).to(device))
                padded_seq_batch = torch.nn.utils.rnn.pad_sequence(
                    seq_list, batch_first=batch_first)
                return padded_seq_batch, seq_lens

            x_data, seq_lens = gen_packed_sequence()
            input_encoder = x_data
            input_enc_len = seq_lens.to(device)
            input_decoder = gen_packed_sequence()[0]
            model.forward(input_encoder, input_enc_len, input_decoder)

        _, model = create_compressed_model(model, config, dummy_forward_fn)
        model.to(device)

        class Counter:
            def __init__(self):
                self.count = 0

            def next(self):
                self.count += 1

        def hook(model, input_, counter):
            counter.next()

        counters = {}
        for name, quantizer in model.all_quantizations.items():
            counter = Counter()
            counters[name] = counter
            quantizer.register_forward_pre_hook(partial(hook, counter=counter))
        with context('quantized_graphs') as ctx:
            dummy_forward_fn(model)
            assert ctx.graph.get_nodes_count() == 239
            assert len(counters) == 68
            for name, counter in counters.items():
                if 'cell' in name or "LSTMCellForwardNNCF" in name:
                    assert counter.count == sequence_size, name
                else:
                    assert counter.count == 1, name
            new_seq_len = int(sequence_size / 2)
            dummy_forward_fn(model, new_seq_len)
            assert ctx.graph.get_nodes_count() == 239
            assert len(counters) == 68
            for name, counter in counters.items():
                if 'cell' in name or "LSTMCellForwardNNCF" in name:
                    assert counter.count == sequence_size + new_seq_len, name
                else:
                    assert counter.count == 2, name
Example #10
0
    def test_number_of_calling_fq_for_lstm(self, tmp_path):
        p = LSTMTestSizes(1, 1, 1, 5)
        num_layers = 2
        bidirectional = True
        num_directions = 2 if bidirectional else 1
        bias = True
        batch_first = False
        patch_torch_operators()
        config = get_empty_config(input_sample_size=(p.seq_length, p.batch,
                                                     p.input_size))
        config['compression'] = {
            'algorithm': 'quantization',
            'quantize_inputs': True
        }

        config.log_dir = str(tmp_path)
        reset_context('orig')
        reset_context('quantized_graphs')
        test_data = TestLSTMCell.generate_lstm_data(p,
                                                    num_layers,
                                                    num_directions,
                                                    bias=bias,
                                                    batch_first=batch_first)

        test_rnn = NNCF_RNN('LSTM',
                            input_size=p.input_size,
                            hidden_size=p.hidden_size,
                            num_layers=num_layers,
                            bidirectional=bidirectional,
                            bias=bias,
                            batch_first=batch_first)
        TestLSTM.set_ref_lstm_weights(test_data, test_rnn, num_layers,
                                      num_directions, bias)
        test_hidden = TestLSTM.get_test_lstm_hidden(test_data)

        _ = reset_context('orig')
        _ = reset_context('quantized_graphs')
        _, model = create_compressed_model(test_rnn, config)

        class Counter:
            def __init__(self):
                self.count = 0

            def next(self):
                self.count += 1

        def hook(model, input_, counter):
            counter.next()

        counters = {}
        for name, quantizer in model.all_quantizations.items():
            counter = Counter()
            counters[name] = counter
            quantizer.register_forward_pre_hook(partial(hook, counter=counter))
        with context('quantized_graphs') as ctx:
            _ = model(test_data.x, test_hidden)
            assert ctx.graph.get_nodes_count() == 110
            ctx.graph.dump_graph(
                os.path.join(config.log_dir, "compressed_graph_next.dot"))
        assert len(counters) == 54
        for counter in counters.values():
            assert counter.count == p.seq_length
def main_worker_binarization(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    original_model = copy.deepcopy(model)
    compression_algo, model = create_compressed_model(model, config)
    if not isinstance(compression_algo, Binarization):
        raise RuntimeError(
            "The binarization sample worker may only be run with the binarization algorithm!"
        )

    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))

    model, _ = prepare_model_for_execution(model, config)
    original_model.to(config.device)

    if config.distributed:
        compression_algo.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = model.parameters()

    compression_config = config['compression']
    binarization_config = compression_config if isinstance(
        compression_config, dict) else compression_config[0]
    optimizer = get_binarization_optimizer(params_to_optimize,
                                           binarization_config)
    optimizer_scheduler = BinarizationOptimizerScheduler(
        optimizer, binarization_config)
    kd_loss_calculator = KDLossCalculator(original_model)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo)

    if config.to_onnx is not None:
        compression_algo.export_model(config.to_onnx)
        print("Saved to", config.to_onnx)
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_loader, train_sampler, val_loader = create_dataloaders(config)

    if config.mode.lower() == 'test':
        print_statistics(compression_algo.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_algo.initialize(train_loader)

        batch_multiplier = (binarization_config.get("params", {})).get(
            "batch_multiplier", 1)
        train_bin(config, compression_algo, model, criterion, is_inception,
                  optimizer_scheduler, model_name, optimizer, train_loader,
                  train_sampler, val_loader, kd_loss_calculator,
                  batch_multiplier, best_acc1)