def test_load_state__with_resume_checkpoint(_resume_algos, _model_wrapper,
                                            mocker):
    config_save = get_empty_config()
    config_save['compression'] = [{
        'algorithm': algo
    } for algo in _resume_algos['save_algos'] if algo != 'EMPTY']
    register_bn_adaptation_init_args(config_save)
    orig_model = BasicConvTestModel()
    num_model_params = len(orig_model.state_dict())
    model_save, compressed_ctrl_save = create_compressed_model_and_algo_for_test(
        orig_model, config_save)
    saved_model_state = model_save.state_dict()
    saved_checkpoint = compressed_ctrl_save.get_compression_state()
    ref_num_loaded = _resume_algos[
        'ref_num_compression_params'] + num_model_params + 1  # padding_value

    config_resume = get_empty_config()
    config_resume['compression'] = [{
        'algorithm': algo
    } for algo in _resume_algos['load_algos'] if algo != 'EMPTY']
    register_bn_adaptation_init_args(config_resume)
    from nncf.torch.checkpoint_loading import KeyMatcher
    key_matcher_run_spy = mocker.spy(KeyMatcher, 'run')
    model, _ = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(),
        config_resume,
        compression_state=saved_checkpoint)
    load_state(model, saved_model_state, _resume_algos['is_strict'])
    key_matcher_run_spy.assert_called_once()
    act_num_loaded = len(key_matcher_run_spy.spy_return)
    assert act_num_loaded == ref_num_loaded
Esempio n. 2
0
def test_can_export_compressed_model_with_input_output_names(tmp_path):
    test_path = str(tmp_path.joinpath('test.onnx'))
    target_input_names = ['input1', 'input2']
    target_output_names = ['output1', 'output2']

    model = BasicTestModelWithTwoInputOutput()
    config = get_basic_asym_quantization_config()

    config["input_info"] = [{'sample_size': [1, 1, 4, 4]}, {'sample_size': [1, 1, 4, 4]}]
    register_bn_adaptation_init_args(config)

    _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    compression_ctrl.export_model(test_path, input_names=target_input_names,
                                  output_names=target_output_names)

    assert os.path.exists(test_path)

    onnx_model = onnx.load(test_path)
    # pylint: disable=no-member
    curr_input_names = [node.name for node in onnx_model.graph.input]
    curr_output_names = [node.name for node in onnx_model.graph.output]

    assert curr_input_names == target_input_names
    assert curr_output_names == target_output_names
Esempio n. 3
0
def test_quantize_outputs_with_scope_overrides():
    config = get_quantization_config_without_range_init()
    config["input_info"] = [{
        "sample_size": [2, 3, 32, 32],
    }]
    model = QuantizeOutputsTestModel()
    config['compression']['quantize_outputs'] = True
    config['target_device'] = "TRIAL"
    config['compression']['scope_overrides'] = {
        "activations": {
            "/nncf_model_output_0": {
                "bits": 4,
                "mode": "asymmetric",
            }
        }
    }
    register_bn_adaptation_init_args(config)
    model, ctrl = create_compressed_model_and_algo_for_test(model, config)
    output_quantizers =\
        [q for qid, q in ctrl.all_quantizations.items() if isinstance(qid, NonWeightQuantizerId)]
    for q in output_quantizers[1:]:
        assert q.num_bits == 8
        assert isinstance(q, SymmetricQuantizer)

    assert output_quantizers[0].num_bits == 4
    assert isinstance(output_quantizers[0], AsymmetricQuantizer)
Esempio n. 4
0
def test_staged_scheduler_with_empty_quantization():
    config = get_squeezenet_quantization_config()
    config['compression'].update({
        'params': {
            "activations_quant_start_epoch": 1,
            "weights_quant_start_epoch": 2,
        }
    })
    register_bn_adaptation_init_args(config)
    model = squeezenet1_1(num_classes=10, dropout=0)

    model, algo = create_compressed_model_and_algo_for_test(model, config)
    scheduler = algo.scheduler
    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert not module.is_enabled_quantization()
    scheduler.epoch_step()
    for wq_info in algo.weight_quantizers.values():
        assert not wq_info.quantizer_module_ref.is_enabled_quantization()
    for aq_info in algo.non_weight_quantizers.values():
        assert aq_info.quantizer_module_ref.is_enabled_quantization()

    scheduler.epoch_step()
    for module in algo.all_quantizations.values():
        assert module.is_enabled_quantization()
Esempio n. 5
0
def test_multiprocessing_distributed_shares_init_scales_signedness_across_gpus(
        tmp_path, runs_subprocess_in_precommit):
    if not torch.cuda.is_available():
        pytest.skip("Skipping CUDA test cases for CPU only setups")
    num_init_samples = 10

    config = get_squeezenet_quantization_config()
    config['compression']['initializer'] = {
        'range': {
            'num_init_samples': num_init_samples
        }
    }

    ngpus_per_node = torch.cuda.device_count()
    config.world_size = ngpus_per_node
    register_bn_adaptation_init_args(config)
    torch.multiprocessing.spawn(scale_signed_dumping_worker,
                                nprocs=ngpus_per_node,
                                args=(ngpus_per_node, config, tmp_path),
                                join=True)

    assert not compare_multi_gpu_dump(config, tmp_path,
                                      get_path_after_broadcast)
    assert not compare_multi_gpu_dump(config, tmp_path,
                                      get_path_path_after_train_iters)
Esempio n. 6
0
def test_can_quantize_inputs_for_sparsity_plus_quantization():
    model = BasicConvTestModel()
    config = get_basic_sparsity_plus_quantization_config()
    register_bn_adaptation_init_args(config)
    sparse_quantized_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)
    assert isinstance(compression_ctrl,
                      CompositeCompressionAlgorithmController)

    sparse_quantized_model_conv = get_all_modules_by_type(
        sparse_quantized_model, 'NNCFConv2d')

    nncf_module = next(iter(sparse_quantized_model_conv.values()))
    assert len(
        nncf_module.pre_ops) == 2  # 1x weight sparsifier + 1x weight quantizer
    assert isinstance(nncf_module.pre_ops['0'], UpdateWeight)
    assert isinstance(nncf_module.pre_ops['0'].op, RBSparsifyingWeight)

    assert isinstance(nncf_module.pre_ops['1'], UpdateWeight)
    assert isinstance(nncf_module.pre_ops['1'].op, SymmetricQuantizer)

    input_quantizer = get_all_modules(sparse_quantized_model)[
        f'NNCFNetwork/ModuleDict[{EXTERNAL_QUANTIZERS_STORAGE_NAME}]']

    assert len(input_quantizer) == 1
    assert isinstance(list(input_quantizer.values())[0], SymmetricQuantizer)
Esempio n. 7
0
def test_can_export_compressed_model_with_specified_domain_for_custom_ops(tmp_path):
    test_path = str(tmp_path.joinpath('test.onnx'))

    model = BasicTestModelWithTwoInputOutput()
    config = get_basic_asym_quantization_config()

    config["input_info"] = [{'sample_size': [1, 1, 4, 4]}, {'sample_size': [1, 1, 4, 4]}]
    register_bn_adaptation_init_args(config)

    _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    compression_ctrl.export_model(test_path)

    assert os.path.exists(test_path)

    onnx_model = onnx.load(test_path)

    count_custom_ops = 0
    # pylint: disable=no-member
    for op_node in onnx_model.graph.node:
        if op_node.op_type == "FakeQuantize":
            assert op_node.domain == DOMAIN_CUSTOM_OPS_NAME
            count_custom_ops += 1

    assert count_custom_ops == 4
Esempio n. 8
0
def test_quantization_configs__with_defaults():
    model = BasicConvTestModel()
    config = get_quantization_config_without_range_init()
    register_bn_adaptation_init_args(config)
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)

    assert isinstance(compression_ctrl, QuantizationController)
    weight_quantizers = compression_ctrl.weight_quantizers
    activation_quantizer_infos = compression_ctrl.non_weight_quantizers

    ref_weight_qspec = PTQuantizerSpec(
        num_bits=8,
        mode=QuantizationMode.SYMMETRIC,
        signedness_to_force=True,
        narrow_range=True,
        half_range=False,
        scale_shape=model.wq_scale_shape_per_channel,
        logarithm_scale=False)
    for wq_info in weight_quantizers.values():
        compare_qspecs(ref_weight_qspec, wq_info.quantizer_module_ref)

    ref_activation_qspec = PTQuantizerSpec(num_bits=8,
                                           mode=QuantizationMode.SYMMETRIC,
                                           signedness_to_force=None,
                                           narrow_range=False,
                                           half_range=False,
                                           scale_shape=(1, ),
                                           logarithm_scale=False)
    for aq_info in activation_quantizer_infos.values():
        compare_qspecs(ref_activation_qspec, aq_info.quantizer_module_ref)
Esempio n. 9
0
def test_quantization_preset_with_scope_overrides():
    model = QuantizeOutputsTestModel()
    config = get_empty_config(input_sample_sizes=[2, 3, 32, 32])
    config['target_device'] = "TRIAL"
    config['compression'] = {
        'algorithm': 'quantization',
        'preset': 'mixed',
        'scope_overrides': {
            'weights': {
                'QuantizeOutputsTestModel/NNCFConv2d[conv5]/conv2d_0': {
                    "mode": "asymmetric",
                }
            }
        }
    }
    register_bn_adaptation_init_args(config)
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)

    for wq_info in compression_ctrl.weight_quantizers.values():
        if wq_info.affected_insertions[0].target_node_name !=\
             'QuantizeOutputsTestModel/NNCFConv2d[conv5]/conv2d_0':
            assert isinstance(wq_info.quantizer_module_ref, SymmetricQuantizer)
        else:
            assert isinstance(wq_info.quantizer_module_ref,
                              AsymmetricQuantizer)

    for aq_info in compression_ctrl.non_weight_quantizers.values():
        assert isinstance(aq_info.quantizer_module_ref, AsymmetricQuantizer)
Esempio n. 10
0
def test_activation_quantizers_order_is_the_same__for_resnet50(
        tmp_path, runs_subprocess_in_precommit):
    if not torch.cuda.is_available():
        pytest.skip("Skipping CUDA test cases for CPU only setups")
    config = get_empty_config(input_sample_sizes=[1, 3, 224, 224])
    config['compression'] = {
        'algorithm': 'quantization',
        "initializer": {
            "range": {
                "num_init_samples": 0
            }
        }
    }
    register_bn_adaptation_init_args(config)
    ngpus_per_node = torch.cuda.device_count()

    torch.multiprocessing.spawn(activation_quantizers_dumping_worker,
                                nprocs=ngpus_per_node,
                                args=(config, tmp_path),
                                join=True)

    with open(get_path_to_keys(tmp_path, 0), 'r', encoding='utf8') as f:
        ref_list = f.readlines()
    for i in range(1, ngpus_per_node):
        with open(get_path_to_keys(tmp_path, i), 'r', encoding='utf8') as f:
            curr_list = f.readlines()
            assert curr_list == ref_list
Esempio n. 11
0
def test_quantize_outputs():
    config = get_quantization_config_without_range_init()
    config["input_info"] = [{
        "sample_size": [2, 3, 32, 32],
    }]
    model = QuantizeOutputsTestModel()
    config['compression']['quantize_outputs'] = True
    register_bn_adaptation_init_args(config)
    model, qctrl = create_compressed_model_and_algo_for_test(model, config)
    REF_QUANTIZED_OUTPUT_MODULE_SCOPES = [
        'QuantizeOutputsTestModel/NNCFConv2d[conv1]/conv2d_0|OUTPUT',
        'QuantizeOutputsTestModel/NNCFConv2d[conv2]/conv2d_0|OUTPUT',
        'QuantizeOutputsTestModel/NNCFConv2d[conv3]/conv2d_0|OUTPUT',
        'QuantizeOutputsTestModel/NNCFConv2d[conv4]/conv2d_0|OUTPUT'
    ]
    actual_output_quantizer_str_scopes =\
         [str(aq_id) for aq_id in qctrl.non_weight_quantizers if 'nncf_model_input' not in str(aq_id)]
    assert len(REF_QUANTIZED_OUTPUT_MODULE_SCOPES) == len(
        actual_output_quantizer_str_scopes)

    for ref_qinput_scope_str in REF_QUANTIZED_OUTPUT_MODULE_SCOPES:
        matches = []
        for aq_id in qctrl.non_weight_quantizers:
            if str(aq_id) == ref_qinput_scope_str:
                matches.append(aq_id)
        assert len(matches) == 1
        quantizer = qctrl.non_weight_quantizers[
            matches[0]].quantizer_module_ref
        assert isinstance(quantizer, SymmetricQuantizer)
Esempio n. 12
0
def test_export_stacked_bi_lstm(tmp_path):
    p = LSTMTestSizes(3, 3, 3, 3)
    config = get_empty_config(
        input_sample_sizes=[1, p.hidden_size, p.input_size])
    config['compression'] = {'algorithm': 'quantization'}
    register_bn_adaptation_init_args(config)

    # TODO: batch_first=True fails with building graph: ambiguous call to mul or sigmoid
    test_rnn = NNCF_RNN('LSTM',
                        input_size=p.input_size,
                        hidden_size=p.hidden_size,
                        num_layers=2,
                        bidirectional=True,
                        batch_first=False)
    model, algo = create_compressed_model_and_algo_for_test(test_rnn, config)

    test_path = str(tmp_path.joinpath('test.onnx'))
    algo.export_model(test_path)
    assert os.path.exists(test_path)

    onnx_num = 0
    model = onnx.load(test_path)
    # pylint: disable=no-member
    for node in model.graph.node:
        if node.op_type == 'FakeQuantize':
            onnx_num += 1
    assert onnx_num == 54
Esempio n. 13
0
def test_can_load_quant_algo__with_defaults():
    model = BasicConvTestModel()
    config = get_quantization_config_without_range_init()
    register_bn_adaptation_init_args(config)
    builder = create_compression_algorithm_builder(config)
    assert isinstance(builder, QuantizationBuilder)

    quant_model, _ = create_compressed_model_and_algo_for_test(
        deepcopy(model), config)

    model_conv = get_all_modules_by_type(model, 'Conv2d')
    quant_model_conv = get_all_modules_by_type(
        quant_model.get_nncf_wrapped_model(), 'NNCFConv2d')
    assert len(model_conv) == len(quant_model_conv)

    for module_scope, _ in model_conv.items():
        quant_scope = deepcopy(module_scope)  # type: Scope
        quant_scope.pop()
        quant_scope.push(ScopeElement('NNCFConv2d', 'conv'))
        assert quant_scope in quant_model_conv.keys()

        store = []
        for op in quant_model_conv[quant_scope].pre_ops.values():
            if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance(
                    op.operand, SymmetricQuantizer):
                assert op.__class__.__name__ not in store
                store.append(op.__class__.__name__)
        assert UpdateWeight.__name__ in store
def test_load_state_interoperability(_algos, _model_wrapper, is_resume):
    config_save = get_empty_config()
    config_save['compression'] = [{
        'algorithm': algo
    } for algo in _algos['save_algos']]
    register_bn_adaptation_init_args(config_save)
    compressed_model_save, _ = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config_save)
    model_save = _model_wrapper['save_model'](compressed_model_save)
    saved_model_state = model_save.state_dict()
    ref_num_loaded = len(saved_model_state)

    config_resume = get_empty_config()
    config_resume['compression'] = [{
        'algorithm': algo
    } for algo in _algos['load_algos']]
    register_bn_adaptation_init_args(config_resume)
    compressed_model_resume, _ = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config_resume)
    model_resume = _model_wrapper['resume_model'](compressed_model_resume)

    if not is_resume or (is_resume and _algos['is_resume_ok']):
        act_num_loaded = load_state(model_resume, saved_model_state, is_resume)

        if ('magnitude_sparsity' in _algos['load_algos'] or 'const_sparsity' in _algos['load_algos']) \
            and 'rb_sparsity' in _algos['save_algos']:
            # no need to load _mask and _uniform
            ref_num_loaded -= 2
        assert act_num_loaded == ref_num_loaded
    else:
        with pytest.raises(RuntimeError):
            load_state(model_resume, saved_model_state, is_resume)
Esempio n. 15
0
def get_config_for_logarithm_scale(logarithm_scale: bool,
                                   quantization_type: str) -> NNCFConfig:
    nncf_config = NNCFConfig()
    nncf_config.update({
        "input_info": {
            "sample_size": SAMPLE_SIZE
        },
        "target_device": 'TRIAL',
        "compression": {
            "algorithm": "quantization",
            "initializer": {
                "range": {
                    "num_init_samples": 4,
                    "type": "percentile",
                    "params": {
                        "min_percentile": 0.001,
                        "max_percentile": 99.999
                    }
                }
            },
            "activations": {
                "mode": quantization_type,
                "logarithm_scale": logarithm_scale
            },
            "weights": {
                "mode": quantization_type,
                "signed": True,
                "logarithm_scale": logarithm_scale
            }
        }
    })

    class RandDatasetMock:
        def __getitem__(self, index):
            return torch.rand(*SAMPLE_SIZE)

        def __len__(self):
            return 4

    data_loader = torch.utils.data.DataLoader(RandDatasetMock(),
                                              batch_size=1,
                                              shuffle=False,
                                              drop_last=True)

    class SquadInitializingDataloader(
            nncf.torch.initialization.PTInitializingDataLoader):
        def get_inputs(self, batch):
            return batch, {}

        def get_target(self, batch):
            return None

    initializing_data_loader = SquadInitializingDataloader(data_loader)
    init_range = nncf.config.structures.QuantizationRangeInitArgs(
        initializing_data_loader)
    nncf_config.register_extra_structs([init_range])
    register_bn_adaptation_init_args(nncf_config)

    return nncf_config
def test_quantization_configs__with_precisions_list():
    desc = TestPrecisionInitDesc()
    model = desc.model_creator()
    config = desc.config
    register_bn_adaptation_init_args(config)
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)
    desc.check_precision_init(compression_ctrl)
Esempio n. 17
0
def test_can_compress_with_config_and_resume_of_old_checkpoint():
    model = SingleConv2dIdentityModel()
    config = get_basic_quantization_config(
        input_info={"sample_size": [1, 3, 100, 100]})
    register_bn_adaptation_init_args(config)
    create_compressed_model_and_algo_for_test(model,
                                              config,
                                              compression_state=old_style_sd)
Esempio n. 18
0
    def test_unified_scales_are_identical_in_onnx(self, tmp_path):
        # pylint:disable=no-member
        nncf_config = get_quantization_config_without_range_init(model_size=1)
        nncf_config["compression"]["quantize_outputs"] = True
        nncf_config["input_info"] = [
            {
                "sample_size": [1, 1, 1, 2],
            },
        ]
        nncf_config["target_device"] = "VPU"
        register_bn_adaptation_init_args(nncf_config)

        compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(
            SimplerModelForUnifiedScalesTesting(), nncf_config)

        with torch.no_grad():
            for quant_info in compression_ctrl.non_weight_quantizers.values():
                if isinstance(quant_info.quantizer_module_ref,
                              AsymmetricQuantizer):
                    quant_info.quantizer_module_ref.input_range *= torch.abs(
                        torch.rand_like(
                            quant_info.quantizer_module_ref.input_range))
                else:
                    quant_info.quantizer_module_ref.scale *= torch.abs(
                        torch.rand_like(quant_info.quantizer_module_ref.scale))

        test_input1 = torch.ones([1, 1, 1, 2])
        compressed_model.forward(test_input1)

        onnx_path = str(tmp_path / "model.onnx")
        compression_ctrl.export_model(onnx_path)

        onnx_model = onnx.load(onnx_path)

        fq_nodes = TestsWithONNXInspection.get_fq_nodes(onnx_model)
        eltwise_dominator_predicate = partial(
            TestsWithONNXInspection.immediately_dominates_add_or_mul,
            graph=onnx_model.graph)
        eltwise_fq_nodes = list(filter(eltwise_dominator_predicate, fq_nodes))

        cat_dominator_predicate = partial(
            TestsWithONNXInspection.immediately_dominates_cat,
            graph=onnx_model.graph)
        cat_fq_nodes = list(filter(cat_dominator_predicate, fq_nodes))

        fq_nodes_grouped_by_output = TestsWithONNXInspection.group_nodes_by_output_target(
            eltwise_fq_nodes + cat_fq_nodes, onnx_model.graph)

        for unified_scale_group in fq_nodes_grouped_by_output:
            inputs = [
                resolve_constant_node_inputs_to_values(fq_node,
                                                       onnx_model.graph)
                for fq_node in unified_scale_group
            ]
            for inputs_dict in inputs[1:]:
                curr_values = list(inputs_dict.values())
                ref_values = list(inputs[0].values())
                assert curr_values == ref_values  # All inputs for unified scale quantizers must be equal
    def test_quantize_network(self, desc: ModelDesc, _case_config):
        model = desc.model_builder()

        config = get_basic_quantization_config(_case_config.quant_type, input_sample_sizes=desc.input_sample_sizes)
        register_bn_adaptation_init_args(config)
        compressed_model, _ = \
            create_compressed_model_and_algo_for_test(model, config, dummy_forward_fn=desc.dummy_forward_fn,
                                                      wrap_inputs_fn=desc.wrap_inputs_fn)
        check_model_graph(compressed_model, desc.dot_filename, _case_config.graph_dir)
Esempio n. 20
0
def test_debug_mode():
    config = get_quantization_config_without_range_init()
    register_bn_adaptation_init_args(config)
    model = BasicConvTestModel()
    with nncf_debug():
        model, _ = create_compressed_model_and_algo_for_test(model, config)
        model.forward(
            torch.zeros(BasicConvTestModel.INPUT_SIZE,
                        device=next(model.parameters()).device))
Esempio n. 21
0
    def test_can_export_compressed_model(self, tmp_path, config_provider, model_provider):
        test_path = str(tmp_path.joinpath('test.onnx'))
        model = model_provider()
        config = config_provider()
        register_bn_adaptation_init_args(config)
        _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

        compression_ctrl.export_model(test_path)
        assert os.path.exists(test_path)
Esempio n. 22
0
def test_renamed_activation_quantizer_storage_in_state_dict():
    model = SingleConv2dIdentityModel()
    config = get_basic_quantization_config(
        input_info={"sample_size": [1, 3, 100, 100]})
    register_bn_adaptation_init_args(config)
    compressed_model, _ = create_compressed_model_and_algo_for_test(
        model, config)

    with pytest.deprecated_call():
        _ = load_state(compressed_model, old_style_sd, is_resume=True)
def test_resnet18__with_not_qinput(_case_config):
    model = test_models.ResNet18()
    input_shape = [1, 3, 32, 32]

    config = get_basic_quantization_config(_case_config.quant_type, input_sample_sizes=input_shape)
    config["compression"].update({"quantize_inputs": False})
    register_bn_adaptation_init_args(config)

    compressed_model, _ = create_compressed_model_and_algo_for_test(model, config)
    check_model_graph(compressed_model, 'resnet18_no_qinput.dot', _case_config.graph_dir)
def test_output_quantization(_case_config):
    model = test_models.UNet()
    input_shape = [1, 3, 360, 480]

    config = get_basic_quantization_config(_case_config.quant_type, input_sample_sizes=input_shape)
    config["compression"].update({"quantize_outputs": True})
    register_bn_adaptation_init_args(config)

    compressed_model, _ = create_compressed_model_and_algo_for_test(model, config)
    check_model_graph(compressed_model, 'unet_qoutput.dot', _case_config.graph_dir)
Esempio n. 25
0
def test_context_independence(model_name, model_builder, input_size, _case_config):

    config = get_basic_quantization_config(_case_config.quant_type, input_sample_sizes=input_size[0])
    register_bn_adaptation_init_args(config)

    compressed_models = [create_compressed_model_and_algo_for_test(model_builder[0](), config)[0],
                         create_compressed_model_and_algo_for_test(model_builder[1](), config)[0]]

    for i, compressed_model in enumerate(compressed_models):
        check_model_graph(compressed_model, model_name[i], _case_config.graph_dir)
Esempio n. 26
0
def test_multiple_forward():
    # Check that all convolution nodes in model have op_address and layer_attributes
    # for case with multiple forward of one module
    model = TestModelMultipleForward()
    config = get_basic_sparsity_plus_quantization_config()
    register_bn_adaptation_init_args(config)
    sparse_quantized_model, _ = create_compressed_model_and_algo_for_test(model, config)
    graph = sparse_quantized_model.get_original_graph()
    for node in list(graph.get_all_nodes())[1:-2]:
        assert node.layer_attributes is not None
Esempio n. 27
0
    def test_weight_and_act_quantizer_scale_unification(self, tmp_path):
        # pylint:disable=no-member
        nncf_config = get_quantization_config_without_range_init(model_size=1)
        nncf_config["input_info"] = [
            {
                "sample_size": [1, 5],
                "type": "long",
                "filler": "zeros"
            },
        ]
        nncf_config["target_device"] = "VPU"
        register_bn_adaptation_init_args(nncf_config)

        compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(
            TwoEmbeddingAddModel(), nncf_config)

        with torch.no_grad():
            for quant_module in compression_ctrl.all_quantizations.values():
                if isinstance(quant_module, AsymmetricQuantizer):
                    quant_module.input_range *= torch.abs(
                        torch.rand_like(quant_module.input_range))
                else:
                    quant_module.scale *= torch.abs(
                        torch.rand_like(quant_module.scale))

        test_input1 = torch.ones([1, 5], dtype=torch.long)
        compressed_model.forward(test_input1)

        onnx_path = str(tmp_path / "model.onnx")
        compression_ctrl.export_model(onnx_path)

        onnx_model = onnx.load(onnx_path)

        fq_nodes = TestsWithONNXInspection.get_fq_nodes(onnx_model)
        eltwise_dominator_predicate = partial(
            TestsWithONNXInspection.immediately_dominates_add_or_mul,
            graph=onnx_model.graph)
        embedding_dominator_predicate = partial(
            TestsWithONNXInspection.immediately_dominates_embedding,
            graph=onnx_model.graph)
        eltwise_fq_nodes = list(filter(eltwise_dominator_predicate, fq_nodes))
        embedding_weight_fq_nodes = list(
            filter(embedding_dominator_predicate, fq_nodes))

        fq_nodes_with_expected_unified_scales = embedding_weight_fq_nodes + eltwise_fq_nodes

        unified_fq_node_inputs = [
            resolve_constant_node_inputs_to_values(fq_node, onnx_model.graph)
            for fq_node in fq_nodes_with_expected_unified_scales
        ]
        for inputs_dict in unified_fq_node_inputs[1:]:
            curr_values = list(inputs_dict.values())
            ref_values = list(unified_fq_node_inputs[0].values())
            assert curr_values == ref_values  # All inputs for unified scale quantizers must be equal
Esempio n. 28
0
def test_can_create_quant_loss_and_scheduler():
    config = get_quantization_config_without_range_init()
    register_bn_adaptation_init_args(config)
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config)

    loss = compression_ctrl.loss
    assert isinstance(loss, PTCompressionLoss)

    scheduler = compression_ctrl.scheduler
    assert isinstance(scheduler, CompressionScheduler)
def test_synthetic_model_quantization(synthetic_model_desc: IModelDesc):
    config = get_basic_quantization_config(input_sample_sizes=synthetic_model_desc.get_input_sample_sizes(),
                                           input_info=synthetic_model_desc.get_input_info())
    register_bn_adaptation_init_args(config)

    model = synthetic_model_desc.get_model()
    compressed_model, _ = create_compressed_model_and_algo_for_test(
        model, config, wrap_inputs_fn=synthetic_model_desc.get_wrap_inputs_fn())

    check_model_graph(compressed_model, synthetic_model_desc.get_dot_filename(),
                      os.path.join('quantized', 'synthetic_model'))
Esempio n. 30
0
def create_config():
    config = get_empty_config()
    config['compression'] = {
        'algorithm': 'quantization',
        'initializer': {
            'range': {
                'num_init_samples': 1
            }
        }
    }
    register_bn_adaptation_init_args(config)
    return config