Example #1
0
def test_quantization_preset_with_scope_overrides():
    model = get_basic_two_conv_test_model()
    config = get_basic_quantization_config()
    config['target_device'] = "TRIAL"
    config['compression'] = {
        'algorithm': 'quantization',
        'preset': 'mixed',
        'scope_overrides': {
            'weights': {
                'conv2d': {
                    "mode": "asymmetric",
                }
            }
        }
    }
    compression_model, _ = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)

    activation_quantizers, weight_quantizers = get_quantizers(
        compression_model)
    for aq in activation_quantizers:
        assert aq.mode == 'asymmetric'
    for wq in weight_quantizers:
        if wq.name == 'conv2d_kernel_quantizer':
            assert wq.mode == 'asymmetric'
        else:
            assert wq.mode == 'symmetric'
Example #2
0
def test_quantization_configs__with_defaults():
    model = get_basic_conv_test_model()
    config = get_basic_quantization_config()

    compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)

    assert isinstance(compression_ctrl, QuantizationController)
    check_default_qspecs(compression_model)
Example #3
0
def test_quantization_configs__disable_overflow_fix():
    model = get_basic_conv_test_model()

    config = get_basic_quantization_config()
    config['compression'].update({'overflow_fix': 'disable'})
    compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)

    assert isinstance(compression_ctrl, QuantizationController)
    check_specs_for_disabled_overflow_fix(compression_model)
def test_checkpoint_callback_make_checkpoints(mocker, tmp_path):
    save_freq = 2
    config = get_basic_quantization_config()
    gen_setup_spy = mocker.spy(QuantizationBuilder, '_get_quantizer_setup')

    model, compression_ctrl = create_compressed_model_and_algo_for_test(
        get_basic_conv_test_model(), config, force_no_init=True)
    assert isinstance(compression_ctrl, QuantizationController)

    quantizer_setup = gen_setup_spy.spy_return
    compression_callbacks = create_compression_callbacks(compression_ctrl,
                                                         log_tensorboard=False)
    dataset_len = 8

    dummy_x = tf.random.normal((dataset_len, ) + model.input_shape[1:])
    dummy_y = tf.random.normal((dataset_len, ) + model.output_shape[1:])

    model.compile(loss=tf.losses.CategoricalCrossentropy())

    ckpt_path = tmp_path / 'checkpoint'
    ckpt = tf.train.Checkpoint(
        model=model, compression_state=TFCompressionState(compression_ctrl))
    model.fit(dummy_x,
              dummy_y,
              epochs=5,
              batch_size=2,
              callbacks=[
                  CheckpointManagerCallback(ckpt, str(ckpt_path), save_freq),
                  *compression_callbacks
              ])

    assert sorted(os.listdir(ckpt_path)) == REF_CKPT_DIR[save_freq]

    new_compression_state = load_compression_state(ckpt_path)

    new_model, new_compression_ctrl = create_compressed_model_and_algo_for_test(
        get_basic_conv_test_model(), config, new_compression_state)
    new_model.compile(loss=tf.losses.CategoricalCrossentropy())
    new_ckpt = tf.train.Checkpoint(
        model=new_model,
        compression_state=TFCompressionState(new_compression_ctrl))
    load_checkpoint(new_ckpt, ckpt_path)

    builder = QuantizationBuilder(config)
    builder.load_state(new_compression_state['builder_state'])
    # pylint:disable=protected-access
    new_quantizer_setup = builder._quantizer_setup

    assert _quantization_setup_cmp(quantizer_setup, new_quantizer_setup)
    assert new_compression_ctrl.get_state() == compression_ctrl.get_state()
    assert tf.reduce_all([
        tf.reduce_all(w_new == w)
        for w_new, w in zip(new_model.weights, model.weights)
    ])
Example #5
0
def test_early_exit_compression_training_loop(max_accuracy_degradation,
                                              maximal_total_epochs=100, uncompressed_model_accuracy=0.2,
                                              steps_per_epoch=20, img_size=10):
    set_random_seed(42)
    model = get_simple_conv_regression_model(img_size)
    dataset = get_const_target_mock_regression_dataset(img_size=img_size,
                                                       num_samples=steps_per_epoch)

    config = get_basic_quantization_config(img_size)
    params = {
        "maximal_total_epochs": maximal_total_epochs,
    }
    params.update(max_accuracy_degradation)
    accuracy_aware_config = {
        "accuracy_aware_training": {
            "mode": "early_exit",
            "params": params
        }
    }
    config.update(accuracy_aware_config)
    config = register_default_init_args(config, dataset, batch_size=1)
    compress_model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)
    compression_callbacks = create_compression_callbacks(compression_ctrl, log_tensorboard=False)
    compress_model.add_loss(compression_ctrl.loss)

    def inverse_loss(y_true, y_pred):
        return 1 / (1 + (y_true - y_pred) ** 2)

    compress_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
                           loss=tf.keras.losses.MeanSquaredError(),
                           metrics=inverse_loss)

    result_dict_to_val_metric_fn = lambda results: results['inverse_loss']

    compress_model.accuracy_aware_fit(dataset,
                                      compression_ctrl,
                                      nncf_config=config,
                                      callbacks=compression_callbacks,
                                      initial_epoch=0,
                                      steps_per_epoch=steps_per_epoch,
                                      uncompressed_model_accuracy=uncompressed_model_accuracy,
                                      result_dict_to_val_metric_fn=result_dict_to_val_metric_fn)
    original_model_accuracy = compress_model.original_model_accuracy
    compressed_model_accuracy = result_dict_to_val_metric_fn(compress_model.evaluate(dataset, return_dict=True))

    if "maximal_absolute_accuracy_degradation" in max_accuracy_degradation:
        assert (original_model_accuracy - compressed_model_accuracy) <= \
               max_accuracy_degradation["maximal_absolute_accuracy_degradation"]
    else:
        assert (original_model_accuracy - compressed_model_accuracy) / original_model_accuracy * 100 <= \
               max_accuracy_degradation["maximal_relative_accuracy_degradation"]
Example #6
0
def test_quantize_outputs_removal():
    config = get_basic_quantization_config()
    sample_size = [2, 32, 32, 3]
    model = get_quantize_outputs_removal_test_model(sample_size)

    model, _ = create_compressed_model_and_algo_for_test(model,
                                                         config,
                                                         force_no_init=True)
    ref_fake_quantize_layers = ['input/fake_quantize']
    actual_fake_quantize_layers = [
        layer.name for layer in model.layers
        if isinstance(layer, FakeQuantize)
    ]
    assert actual_fake_quantize_layers == ref_fake_quantize_layers
    assert len(actual_fake_quantize_layers) == len(ref_fake_quantize_layers)
def test_eltwise_unified_scales_for_vpu():
    nncf_config = get_basic_quantization_config()
    x_shape = [1, 1, 1, 1]
    y_shape = [1, 1, 1, 1]
    nncf_config["target_device"] = "VPU"

    model = get_eltwise_quantizer_linking_test_model([x_shape, y_shape])
    compressed_model, _ = create_compressed_model_and_algo_for_test(
        model, nncf_config, force_no_init=True)

    non_weight_quantizers = len(collect_fake_quantize_layers(compressed_model))
    assert non_weight_quantizers == 2

    total_quantizations = get_total_quantizations(compressed_model)
    assert total_quantizations == 8
Example #8
0
def test_export_overflow_fix(sf_mode):
    model = get_basic_two_conv_test_model()
    config = get_basic_quantization_config()
    config['compression'].update({'overflow_fix': sf_mode})
    enabled = sf_mode in ['enable', 'first_layer_only']

    compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)
    activation_quantizers_be, weight_quantizers_be = get_quantizers(
        compression_model)

    for idx, wq in enumerate(weight_quantizers_be):
        if sf_mode == 'first_layer_only' and idx > 0:
            enabled = False
        ref_weight_qspec = TFQuantizerSpec(mode=QuantizationMode.SYMMETRIC,
                                           num_bits=8,
                                           signedness_to_force=True,
                                           per_channel=True,
                                           narrow_range=not enabled,
                                           half_range=enabled)
        compare_qspecs(ref_weight_qspec, wq)

    ref_activation_qspec = TFQuantizerSpec(mode=QuantizationMode.SYMMETRIC,
                                           num_bits=8,
                                           signedness_to_force=None,
                                           per_channel=False,
                                           narrow_range=False,
                                           half_range=False)
    for wq in activation_quantizers_be:
        compare_qspecs(ref_activation_qspec, wq)

    enabled = sf_mode in ['enable', 'first_layer_only']
    compression_ctrl.export_model('/tmp/test.pb')
    activation_quantizers_ae, weight_quantizers_ae = get_quantizers(
        compression_model)

    for idx, wq in enumerate(weight_quantizers_ae):
        if sf_mode == 'first_layer_only' and idx > 0:
            enabled = False
        ref_weight_qspec = TFQuantizerSpec(mode=QuantizationMode.SYMMETRIC,
                                           num_bits=8,
                                           signedness_to_force=True,
                                           per_channel=True,
                                           narrow_range=not enabled,
                                           half_range=False)
        compare_qspecs(ref_weight_qspec, wq)
    for wq in activation_quantizers_ae:
        compare_qspecs(ref_activation_qspec, wq)
def test_unified_scales_with_concat(target_device, model_creator,
                                    ref_aq_module_count, ref_quantizations):
    nncf_config = get_basic_quantization_config()
    x_shape = [1, 4, 1, 1]
    y_shape = [1, 4, 1, 1]
    nncf_config["target_device"] = target_device

    model = model_creator([x_shape, y_shape])
    compressed_model, _ = create_compressed_model_and_algo_for_test(
        model, nncf_config, force_no_init=True)

    non_weight_quantizers = len(collect_fake_quantize_layers(compressed_model))
    assert non_weight_quantizers == ref_aq_module_count

    total_quantizations = get_total_quantizations(compressed_model)
    assert total_quantizations == ref_quantizations
Example #10
0
def test_quantization_preset(data):
    model = get_basic_conv_test_model()

    config = get_basic_quantization_config()
    config['target_device'] = data['target_device']
    config['compression'] = {
        'algorithm': 'quantization',
        'preset': data['preset']
    }
    config['compression'].update(data['overrided_param'])
    compression_model, _ = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)

    activation_quantizers, weight_quantizers = get_quantizers(
        compression_model)
    for aq in activation_quantizers:
        assert aq.mode == data['expected_activations_q']
    for wq in weight_quantizers:
        assert wq.mode == data['expected_weights_q']
def test_shared_op_unified_scales(target_device):
    nncf_config = get_basic_quantization_config()
    nncf_config["target_device"] = target_device

    model = get_shared_conv_test_model()
    compressed_model, _ = create_compressed_model_and_algo_for_test(
        model, nncf_config, force_no_init=True)

    non_weight_quantizers = len(collect_fake_quantize_layers(compressed_model))
    assert non_weight_quantizers == 5

    total_quantizations = get_total_quantizations(compressed_model)
    assert total_quantizations == 8

    input_1 = tf.random.uniform(shape=(1, 5, 5, 3), dtype=tf.float32)
    input_2 = tf.random.uniform(shape=(1, 10, 10, 3), dtype=tf.float32)
    inputs = [input_1, input_2]
    out = compressed_model.predict(inputs)
    assert out.shape == (1, 375)
Example #12
0
def test_quantization_configs__custom():
    model = get_basic_conv_test_model()

    config = get_basic_quantization_config()
    config['target_device'] = 'TRIAL'
    config['compression'].update({
        "weights": {
            "mode": "asymmetric",
            "per_channel": True,
            "bits": 4
        },
        "activations": {
            "mode": "asymmetric",
            "bits": 4,
            "signed": True,
        },
    })
    compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config, force_no_init=True)

    assert isinstance(compression_ctrl, QuantizationController)
    activation_quantizers, weight_quantizers = get_quantizers(
        compression_model)

    ref_weight_qspec = TFQuantizerSpec(mode=QuantizationMode.ASYMMETRIC,
                                       num_bits=4,
                                       signedness_to_force=None,
                                       per_channel=True,
                                       narrow_range=True,
                                       half_range=False)
    for wq in weight_quantizers:
        compare_qspecs(ref_weight_qspec, wq)

    ref_activation_qspec = TFQuantizerSpec(mode=QuantizationMode.ASYMMETRIC,
                                           num_bits=4,
                                           signedness_to_force=True,
                                           per_channel=False,
                                           narrow_range=False,
                                           half_range=False)
    for wq in activation_quantizers:
        compare_qspecs(ref_activation_qspec, wq)
Example #13
0
def test_quantize_inputs():
    config = get_basic_quantization_config()
    config['target_device'] = 'TRIAL'
    input_shapes = [[2, 32, 32, 3] for i in range(5)]
    model = get_quantize_inputs_test_model(input_shapes)

    model, _ = create_compressed_model_and_algo_for_test(model,
                                                         config,
                                                         force_no_init=True)
    ref_fake_quantize_layers_for_inputs = {
        'rescaling/fake_quantize', 'input_2/fake_quantize',
        'input_3/fake_quantize', 'input_4/fake_quantize',
        'input_5/fake_quantize'
    }
    ref_fake_quantize_layers = 17

    actual_fake_quantize_layers = {
        layer.name
        for layer in model.layers if isinstance(layer, FakeQuantize)
    }
    assert ref_fake_quantize_layers_for_inputs.issubset(
        actual_fake_quantize_layers)
    assert len(actual_fake_quantize_layers) == ref_fake_quantize_layers
def test_quantization_configs__on_resume_with_compression_state(
        tmp_path, mocker):
    model = get_basic_conv_test_model()
    config = get_basic_quantization_config()
    init_spy = mocker.spy(QuantizationBuilder, 'initialize')
    gen_setup_spy = mocker.spy(QuantizationBuilder, '_get_quantizer_setup')
    dataset = get_dataset_for_test(shape=[4, 4, 1])
    config = register_default_init_args(config, dataset, 10)

    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)
    assert isinstance(compression_ctrl, QuantizationController)
    init_spy.assert_called()
    gen_setup_spy.assert_called()
    saved_quantizer_setup = gen_setup_spy.spy_return
    check_serialization(saved_quantizer_setup, _quantization_setup_cmp)

    compression_state_to_load = _save_and_load_compression_state(
        compression_ctrl, tmp_path)

    init_spy.reset_mock()
    gen_setup_spy.reset_mock()

    compression_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config, compression_state_to_load)
    assert isinstance(compression_ctrl, QuantizationController)

    init_spy.assert_not_called()
    gen_setup_spy.assert_not_called()
    check_default_qspecs(compression_model)

    builder = QuantizationBuilder(config)
    builder.load_state(compression_state_to_load['builder_state'])
    # pylint:disable=protected-access
    loaded_quantizer_setup = builder._quantizer_setup
    assert _quantization_setup_cmp(loaded_quantizer_setup,
                                   saved_quantizer_setup)
Example #15
0
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
      http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
"""

# Do not remove - these imports are for testing purposes.
#pylint:disable=unused-import
import nncf

import tensorflow as tf

from nncf.common.compression import BaseCompressionAlgorithmController
from nncf.tensorflow.helpers.model_creation import create_compressed_model
from tests.tensorflow.quantization.utils import get_basic_quantization_config

inputs = tf.keras.Input(shape=(3, 3, 1))
outputs = tf.keras.layers.Conv2D(filters=3, kernel_size=3)(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

config = get_basic_quantization_config()
compression_state_to_skip_init = {
    BaseCompressionAlgorithmController.BUILDER_STATE: {}
}
compression_model, compression_ctrl = create_compressed_model(
    model, config, compression_state_to_skip_init)