Example #1
0
    def test_ad_hoc_range_init_does_not_replace_parameter_tensors(
            self, wrap_dataloader, quant_type):
        config = create_config()
        config["compression"].update({
            "activations": {
                "mode": quant_type
            },
            "weights": {
                "mode": quant_type
            }
        })

        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])

        model = TwoConvTestModel()
        quant_model, quant_ctrl = create_compressed_model_and_algo_for_test(
            model, config)
        param_name_vs_id = {
            name: id(tnsr)
            for name, tnsr in quant_model.named_parameters()
        }

        quant_ctrl.init_range()

        for name, param in quant_model.named_parameters():
            assert param_name_vs_id[name] == id(param)
Example #2
0
    def test_scale_and_sign_init_for_quant_algo__after_load_state(
            self, wrap_dataloader):
        config = create_config()
        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
        _, compressed_model = self.create_algo_and_compressed_model(config)
        ref_loaded_scale_val = torch.ones((1, 1, 1, 1)) * 100
        load_state(
            compressed_model,
            {
                'module.features.0.0.pre_ops.0.op.signed_tensor':
                torch.tensor([0.]),  # quantizer of 1st conv's weights
                'module.features.1.0.pre_ops.0.op.scale':
                ref_loaded_scale_val  # quantizer of 2nd conv's weights
            })

        self.check_sign_and_scale(
            compressed_model, {
                '.*Sequential\\[0\\].*UpdateWeight.*':
                (False, torch.ones(2, 1, 1, 1)),
                '.*Sequential\\[1\\].*UpdateWeight. *':
                (True, ref_loaded_scale_val),
                '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4),
                '.*activation_quantizers.*nncf_model_input*': (False, 1)
            })
Example #3
0
def register_default_init_args(
        nncf_config: 'NNCFConfig',
        train_loader: torch.utils.data.DataLoader,
        criterion: _Loss = None,
        criterion_fn: Callable[[Any, Any, _Loss], torch.Tensor] = None,
        autoq_eval_fn: Callable[[torch.nn.Module, torch.utils.data.DataLoader],
                                float] = None,
        autoq_eval_loader: torch.utils.data.DataLoader = None,
        device: str = None) -> 'NNCFConfig':

    nncf_config.register_extra_structs([
        QuantizationRangeInitArgs(data_loader=train_loader, device=device),
        BNAdaptationInitArgs(data_loader=train_loader, device=device)
    ])

    if criterion:
        if not criterion_fn:
            criterion_fn = default_criterion_fn
        nncf_config.register_extra_structs([
            QuantizationPrecisionInitArgs(criterion_fn=criterion_fn,
                                          criterion=criterion,
                                          data_loader=train_loader,
                                          device=device)
        ])

    if autoq_eval_fn:
        if not autoq_eval_loader:
            autoq_eval_loader = train_loader
        nncf_config.register_extra_structs([
            AutoQPrecisionInitArgs(data_loader=autoq_eval_loader,
                                   eval_fn=autoq_eval_fn,
                                   nncf_config=nncf_config)
        ])

    return nncf_config
Example #4
0
def register_default_init_args(nncf_config: 'NNCFConfig', criterion,
                               train_loader) -> 'NNCFConfig':
    nncf_config.register_extra_structs([
        QuantizationPrecisionInitArgs(criterion=criterion,
                                      data_loader=train_loader),
        QuantizationRangeInitArgs(data_loader=train_loader)
    ])
    return nncf_config
Example #5
0
 def test_per_layer_range_init_with_correct_possible_config(
         self, wrap_dataloader, per_layer_range_init_test_struct):
     config = create_config()
     config['compression']['initializer'][
         'range'] = per_layer_range_init_test_struct.range_init_config
     data_loader = self.create_dataloader(wrap_dataloader, config)
     config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
     ctrl, _ = self.create_algo_and_compressed_model(config)
     for str_scope, range_init_config in per_layer_range_init_test_struct.expected_modules_to_init.items(
     ):
         assert ctrl.modules_to_range_init[str_scope][
             1] == range_init_config
Example #6
0
    def test_scale_and_sign_init_for_quant_algo__without_init_section(
            self, wrap_dataloader, config_creator):
        config = config_creator()
        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
        _, compressed_model = self.create_algo_and_compressed_model(config)

        self.check_sign_and_scale(
            compressed_model, {
                '.*Sequential\\[0\\].*UpdateWeight.*': (True, 1),
                '.*Sequential\\[1\\].*UpdateWeight. *': (False, 1),
                '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4),
                '.*activation_quantizers.*Sequential\\[1\\].*': (True, 24)
            })
Example #7
0
def register_default_init_args(nncf_config: 'NNCFConfig',
                               train_loader,
                               criterion: _Loss = None,
                               criterion_fn: Callable[[Any, Any, _Loss],
                                                      torch.Tensor] = None,
                               device='cuda') -> 'NNCFConfig':
    if criterion:
        if not criterion_fn:
            criterion_fn = default_criterion_fn
        nncf_config.register_extra_structs([
            QuantizationPrecisionInitArgs(criterion_fn=criterion_fn,
                                          criterion=criterion,
                                          data_loader=train_loader,
                                          device=device),
            QuantizationRangeInitArgs(data_loader=train_loader, device=device),
            BNAdaptationInitArgs(data_loader=train_loader, device=device)
        ])
    else:
        nncf_config.register_extra_structs([
            QuantizationRangeInitArgs(data_loader=train_loader, device=device),
            BNAdaptationInitArgs(data_loader=train_loader, device=device)
        ])
    return nncf_config
Example #8
0
    def test_scope_overrides(self, wrap_dataloader):
        config = create_config()
        config['target_device'] = 'NONE'
        config["compression"]["scope_overrides"] = {
            r"{re}NNCFConv2d\[[0-9]*\]$": {
                "bits": 7,
                "mode": "asymmetric",
            },
            "/nncf_model_input_0": {
                "bits": 7,
                "mode": "asymmetric",
            },
            r"{re}NNCFConv2d\[[0-9]*\]/conv2d_0": {
                "bits": 7,
                "signed": False,
            }
        }
        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
        _, compressed_model = self.create_algo_and_compressed_model(config)

        quantizers = get_all_modules_by_type(
            compressed_model, ['SymmetricQuantizer', 'AsymmetricQuantizer'])
        quantizer_str_dict = {str(k): v for k, v in quantizers.items()}
        group_1 = [
            quantizer_str_dict[
                "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/"
                "Sequential[0]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/"
                "AsymmetricQuantizer[op]"],
            quantizer_str_dict[
                "NNCFNetwork/ModuleDict[activation_quantizers]/AsymmetricQuantizer"
                "[/nncf_model_input_0]"],
            quantizer_str_dict[
                "NNCFNetwork/TwoConvTestModel[nncf_module]/Sequential[features]/"
                "Sequential[1]/NNCFConv2d[0]/ModuleDict[pre_ops]/UpdateWeight[0]/"
                "AsymmetricQuantizer[op]"]
        ]
        group_2 = [
            quantizer_str_dict[
                "NNCFNetwork/ModuleDict[activation_quantizers]/"
                "SymmetricQuantizer[TwoConvTestModel/Sequential[features]"
                "/Sequential[0]/NNCFConv2d[0]/conv2d_0]"]
        ]

        for quantizer in group_1:
            assert isinstance(quantizer, AsymmetricQuantizer)
            assert quantizer.levels == 2**7
        for quantizer in group_2:
            assert isinstance(quantizer, SymmetricQuantizer)
            assert not quantizer.signed
Example #9
0
    def test_scale_and_sign_init_for_quant_algo__with_zero_init_steps(
            self, wrap_dataloader):
        config = create_config()
        config['compression']['initializer']['range']['num_init_steps'] = 0

        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
        _, compressed_model = self.create_algo_and_compressed_model(config)

        self.check_sign_and_scale(
            compressed_model, {
                '.*Sequential\\[0\\].*UpdateWeight.*': (False, 1),
                '.*Sequential\\[1\\].*UpdateWeight. *': (False, 1),
                '.*activation_quantizers.*Sequential\\[0\\].*': (False, 1),
                '.*activation_quantizers.*Sequential\\[1\\].*': (False, 1)
            })
Example #10
0
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path):
    distributed_init_test_default(gpu, ngpus_per_node, config)
    data_loader = create_rank_dataloader(config, gpu)
    model = safe_thread_call(partial(squeezenet1_1, pretrained=True))

    config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
    quant_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)
    compression_scheduler = compression_ctrl.scheduler

    quant_model = post_compression_test_distr_init(compression_ctrl, config,
                                                   ngpus_per_node, quant_model)

    criterion = torch.nn.MSELoss().cuda(config.gpu)
    optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01)

    torch.backends.cudnn.benchmark = True

    # just to reproduce the same scale values without Dropout
    quant_model.eval()

    act_sum = 0
    for layer in get_all_modules_by_type(quant_model,
                                         "SymmetricQuantizer").values():
        act_sum += layer.scale.sum()
    ref_sum = 4447.291
    assert act_sum.item() == approx(ref_sum, 0.01), \
        'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank)

    out_file_path = get_path_after_broadcast(tmp_path, config.rank)
    save_params(quant_model, out_file_path)
    compression_scheduler.step()
    for i, (input_, _) in enumerate(data_loader):
        if i > 5:
            break
        output = quant_model(input_)
        optimizer.zero_grad()
        dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True)
        loss = criterion(output, dummy_target)
        compression_scheduler.step()
        loss.backward()
        optimizer.step()
        compression_scheduler.step()

    out_file_path = get_path_path_after_train_iters(tmp_path, config.rank)
    save_params(quant_model, out_file_path)
Example #11
0
def test_staged_scheduler_with_range_init():
    config = get_squeezenet_quantization_config()
    config['compression'].update({
        'params': {
            "activations_quant_start_epoch": 1,
            "weights_quant_start_epoch": 2,
        },
        'initializer': {
            'range': {
                'num_init_samples': 1
            }
        }
    })
    model = squeezenet1_1(num_classes=10, dropout=0)

    input_infos_list = create_input_infos(config)
    input_sample_size = input_infos_list[0].shape
    data_loader = DataLoader(
        OnesDatasetMock(input_sample_size[1:]),
        batch_size=1,
        num_workers=0,  # Workaround for PyTorch MultiprocessingDataLoader issues
        shuffle=False)
    config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])

    model, algo = create_compressed_model_and_algo_for_test(model, config)
    scheduler = algo.scheduler

    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        if module.is_weights:
            assert not module.is_enabled_quantization()
        else:
            assert module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert module.is_enabled_quantization()
Example #12
0
def test_per_layer_range_init_is_called_the_required_number_of_times(
        range_init_call_count_test_struct, mocker):
    config = create_config()
    config['compression']['initializer'][
        'range'] = range_init_call_count_test_struct.range_init_config
    data_loader = TestRangeInit.create_dataloader(False, config, 10)
    config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])

    range_minmax_init_create_spy = mocker.spy(
        nncf.quantization.init_range.MinMaxInitializer, '__init__')
    range_meanminmax_init_create_spy = mocker.spy(
        nncf.quantization.init_range.MeanMinMaxInitializer, '__init__')
    range_threesigma_init_create_spy = mocker.spy(
        nncf.quantization.init_range.ThreeSigmaInitializer, '__init__')

    range_minmax_init_register_input_spy = mocker.spy(
        nncf.quantization.init_range.MinMaxInitializer, 'register_input')
    range_meanminmax_init_register_input_spy = mocker.spy(
        nncf.quantization.init_range.MeanMinMaxInitializer, 'register_input')
    range_threesigma_init_register_input_spy = mocker.spy(
        nncf.quantization.init_range.ThreeSigmaInitializer, 'register_input')

    TestRangeInit.create_algo_and_compressed_model(config)

    assert range_minmax_init_create_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_initializer_create['min_max']
    assert range_meanminmax_init_create_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_initializer_create['mean_min_max']
    assert range_threesigma_init_create_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_initializer_create['three_sigma']

    assert range_minmax_init_register_input_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_register_input['min_max']
    assert range_meanminmax_init_register_input_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_register_input['mean_min_max']
    assert range_threesigma_init_register_input_spy.call_count ==\
         range_init_call_count_test_struct.expected_call_count_register_input['three_sigma']
Example #13
0
def test_percentile_init(quantization_mode):
    class SyntheticDataset(torch.utils.data.Dataset):
        def __init__(self):
            self._length = 1

        def __getitem__(self, idx):
            if idx >= self._length:
                raise StopIteration
            test_input_sample = torch.zeros([1, 100, 100])
            for i in range(0, 100):
                for j in range(0, 100):
                    test_input_sample[0][i][j] = i * 100 + j
            return test_input_sample, test_input_sample

        def __len__(self):
            return self._length

    data_loader = torch.utils.data.DataLoader(SyntheticDataset(), batch_size=1)

    config_with_init = NNCFConfig()
    config_with_init.update({
        "input_info": {
            "sample_size": [1, 1, 100, 100]
        },
        "compression": {
            "algorithm": "quantization",
            "activations": {
                "mode": quantization_mode,
            },
            "weights": {
                "mode": quantization_mode,
            },
            "initializer": {
                "range": {
                    "num_init_steps": 1,
                    "type": "percentile",
                    "min_percentile": 32.10,
                    "max_percentile": 67.89
                }
            }
        }
    })

    # Activations init check
    id_model = SingleConv2dIdentityModel()
    config_with_init.register_extra_structs(
        [QuantizationRangeInitArgs(data_loader)])
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        id_model, config_with_init)

    act_quantizer = next(iter(compression_ctrl.non_weight_quantizers.values()))

    def assert_range(quantizer: BaseQuantizer):
        # Absolute tolerance is 1.0 due to percentile value interpolation
        if quantization_mode == 'symmetric':
            assert quantizer.scale.item() == approx(6789, abs=1.0)
        else:
            assert quantizer.input_low.item() == approx(3210, abs=1.0)
            assert quantizer.input_range.item() == approx(3578, abs=1.0)

    assert_range(act_quantizer)
    # Weight init check
    synth_weight_model = SingleConv2dSyntheticWeightModel()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        synth_weight_model, config_with_init)

    weight_quantizer = next(
        iter(compression_ctrl.non_weight_quantizers.values()))
    assert_range(weight_quantizer)
def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path):
    config.batch_size = 3
    config.workers = 3
    config.gpu = gpu
    config.ngpus_per_node = ngpus_per_node
    config.rank = gpu
    config.distributed = True

    torch.distributed.init_process_group(backend="nccl",
                                         init_method='tcp://127.0.0.1:8899',
                                         world_size=config.world_size,
                                         rank=config.rank)

    model = safe_thread_call(partial(squeezenet1_1_custom, pretrained=True))

    input_infos_list = create_input_infos(config)
    input_sample_size = input_infos_list[0].shape
    data_loader = torch.utils.data.DataLoader(RankDatasetMock(
        input_sample_size[1:], config.rank),
                                              batch_size=3,
                                              num_workers=1,
                                              shuffle=False)
    config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
    quant_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)
    compression_scheduler = compression_ctrl.scheduler

    torch.cuda.set_device(config.gpu)
    quant_model.cuda(config.gpu)
    config.batch_size = int(config.batch_size / ngpus_per_node)
    config.workers = int(config.workers / ngpus_per_node)
    quant_model = torch.nn.parallel.DistributedDataParallel(
        quant_model, device_ids=[config.gpu])

    compression_ctrl.distributed()

    criterion = torch.nn.MSELoss().cuda(config.gpu)
    optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01)

    torch.backends.cudnn.benchmark = True

    # just to reproduce the same scale values without Dropout
    quant_model.eval()

    act_sum = 0
    for layer in get_all_modules_by_type(quant_model,
                                         "SymmetricQuantizer").values():
        act_sum += layer.scale
    ref_sum = 3467.322
    assert act_sum.item() == approx(ref_sum, 0.01), \
        'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank)

    out_file_path = get_path_after_broadcast(tmp_path, config.rank)
    save_params(quant_model, out_file_path)
    compression_scheduler.step()
    for i, (input_, _) in enumerate(data_loader):
        if i > 5:
            break
        output = quant_model(input_)
        optimizer.zero_grad()
        dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True)
        loss = criterion(output, dummy_target)
        compression_scheduler.step()
        loss.backward()
        optimizer.step()
        compression_scheduler.step()

    out_file_path = get_path_path_after_train_iters(tmp_path, config.rank)
    save_params(quant_model, out_file_path)
Example #15
0
def test_percentile_init(quantization_mode: str, per_channel: bool):
    class SyntheticDataset(torch.utils.data.Dataset):
        def __init__(self):
            self._length = 1

        def __getitem__(self, idx):
            if idx >= self._length:
                raise StopIteration
            test_input_sample = torch.zeros([3, 100, 100])
            for i in range(0, 100):
                for j in range(0, 100):
                    test_input_sample[0][i][j] = i * 100 + j
            test_input_sample[1] = test_input_sample[0]
            test_input_sample[2] = test_input_sample[0]
            return test_input_sample, test_input_sample

        def __len__(self):
            return self._length

    data_loader = torch.utils.data.DataLoader(SyntheticDataset(), batch_size=1)

    config_with_init = NNCFConfig()
    config_with_init.update({
        "input_info": {
            "sample_size": [1, 3, 100, 100]
        },
        "target_device": "NONE",
        "compression": {
            "algorithm": "quantization",
            "activations": {
                "mode": quantization_mode,
                "per_channel": per_channel
            },
            "weights": {
                "mode": quantization_mode,
                "per_channel": per_channel
            },
            "initializer": {
                "range": {
                    "num_init_steps": 1,
                    "type": "percentile",
                    "min_percentile": 32.10,
                    "max_percentile": 67.89
                }
            }
        }
    })

    # Activations init check
    id_model = SingleConv2dIdentityModel()
    config_with_init.register_extra_structs(
        [QuantizationRangeInitArgs(data_loader)])
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        id_model, config_with_init)

    act_quantizer_info = next(
        iter(compression_ctrl.non_weight_quantizers.values()))

    def check_scales(quantizer: BaseQuantizer, per_channel: bool):
        # Absolute tolerance is 1.0 due to percentile value interpolation
        if quantization_mode == 'symmetric':
            assert torch.allclose(quantizer.scale,
                                  torch.ones_like(quantizer.scale) * 6789,
                                  atol=1.0)
            if per_channel:
                assert quantizer.scale.numel() == 3
            else:
                assert quantizer.scale.numel() == 1
        else:
            assert torch.allclose(quantizer.input_low,
                                  torch.ones_like(quantizer.input_low) * 3210,
                                  atol=1.0)
            assert torch.allclose(quantizer.input_range,
                                  torch.ones_like(quantizer.input_low) * 3578,
                                  atol=1.0)
            if per_channel:
                assert quantizer.input_low.numel() == 3
                assert quantizer.input_range.numel() == 3
            else:
                assert quantizer.input_low.numel() == 1
                assert quantizer.input_range.numel() == 1

    check_scales(act_quantizer_info.quantizer_module_ref, per_channel)
    # Weight init check
    synth_weight_model = SingleConv2dSyntheticWeightModel()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        synth_weight_model, config_with_init)

    weight_quantizer = next(iter(compression_ctrl.weight_quantizers.values()))
    check_scales(weight_quantizer, per_channel)