def test_quantization_with_finetuning_algo(models, tmp_path, model_params):
    model_name, model_framework, algo_name, preset, expected_accuracy = model_params

    if not TORCH_AVAILABLE:
        warnings.warn(
            UserWarning(
                'Skipping layerwise finetuning test since torch is not importable'
            ))
        return

    additional_params = {
        'use_layerwise_tuning': True,
        'batch_size': 20,
        'num_samples_for_tuning': 40,
    }
    algorithm_config = make_algo_config(algo_name,
                                        preset,
                                        additional_params=additional_params)
    model = models.get(model_name, model_framework, tmp_path)
    reference_name = model_name + '_quantized_tuned'
    metrics, quantized_model = run_algo(model, model_name, algorithm_config,
                                        tmp_path, reference_name)
    for metric_name in metrics:
        print('{}: {:.4f}'.format(metric_name, metrics[metric_name]))

    assert metrics == pytest.approx(expected_accuracy, abs=0.006)
    check_model(tmp_path,
                quantized_model,
                reference_name,
                model_framework,
                check_weights=False)
Beispiel #2
0
def test_build_quantization_graph_with_ignored_blocks(tmp_path, models, model_name, model_framework):
    model = models.get(model_name, model_framework, tmp_path)
    model = load_model(model.model_params)
    hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix())
    quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)

    check_model(tmp_path, quantization_model, model_name + '_ig_pt', model_framework)
def test_sparsity_with_finetuning_algo(models, tmp_path, model_params):
    model_name, model_framework, algo_name, preset, sparsity_level, expected_accuracy = model_params

    if not TORCH_AVAILABLE:
        warnings.warn(UserWarning('Skipping layerwise finetuning test since torch is not importable'))
        return

    additional_params = {
        'sparsity_level': sparsity_level,
        'stat_subset_size': 300,
        'use_layerwise_tuning': True,
        'weights_lr': 1e-5,
        'bias_lr': 1e-3,
        'batch_size': 20,
        'num_samples_for_tuning': 40,
        'tuning_iterations': 1,
        'use_ranking_subset': False,
    }
    algorithm_config = make_algo_config(algo_name, preset, additional_params=additional_params)
    model = models.get(model_name, model_framework, tmp_path)
    reference_name = model_name + '_sparse_tuned'
    metrics, sparse_model = run_algo(model, model_name, algorithm_config, tmp_path, reference_name)
    check_model_sparsity_level(sparse_model, None, sparsity_level, strict=True)
    for metric_name in metrics:
        print('{}: {:.4f}'.format(metric_name, metrics[metric_name]))

    assert metrics == pytest.approx(expected_accuracy, abs=0.006)
    check_model(tmp_path, sparse_model, reference_name,
                model_framework, check_weights=False)
Beispiel #4
0
def test_build_quantization_graph(tmp_path, models, model_name, model_framework, target_device):
    model = models.get(model_name, model_framework, tmp_path)
    model = load_model(model.model_params, target_device=target_device)

    if target_device == 'GNA':
        hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix())
    else:
        hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix())

    quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)

    check_model(tmp_path, quantization_model, model_name, model_framework)
Beispiel #5
0
def cut_fq_node(model, node_list, graph_transformer, tmp_path):
    model_ = load_model(model.model_params)
    quantized_model = graph_transformer.insert_fake_quantize(model_)
    cropped_model = quantized_model
    for node_name in node_list:
        node = get_node_by_name(cropped_model, node_name)
        for parent_node in nu.get_node_inputs(node):
            if parent_node and parent_node and parent_node.type == 'FakeQuantize':
                cropped_model, *_ = graph_transformer.remove_fq_nodes(quantized_model, [parent_node.name])
                break

    check_model(tmp_path, cropped_model, model.model_name + '_cut_fq', model.framework)
Beispiel #6
0
def test_build_quantization_graph_with_ignored_params(tmp_path, models,
                                                      model_name,
                                                      model_framework):
    if model_name in CASCADE_MAP:
        model = models.get_cascade(model_name, model_framework, tmp_path,
                                   CASCADE_MAP[model_name])
    else:
        model = models.get(model_name, model_framework, tmp_path)
    model = load_model(model.model_params)
    hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix())

    if model_name not in CASCADE_MAP:
        ignored_params = {
            'operations': [{
                'type': 'Add',
            }, {
                'type': 'Convolution',
                'attributes': {
                    'output': 1280,
                    'group': 1
                }
            }]
        }

    if model_name == 'resnet_example':
        ignored_params['scope'] = [
            'Conv_11/WithoutBiases', 'Conv_29/WithoutBiases'
        ]
    elif model_name == 'googlenet_example':
        node_name = 'Conv_10/WithoutBiases'
        ignored_params['scope'] = [node_name]
    elif model_name == 'mtcnn':
        ignored_params = {
            'pnet': {
                'scope': ['conv1/WithoutBiases', 'conv3/WithoutBiases']
            },
            'rnet': {
                'skip_model': True
            },
            'onet': {
                'operations': [{
                    'type': 'MatMul'
                }]
            }
        }

    quantization_model = GraphTransformer(
        hardware_config).insert_fake_quantize(model, ignored_params)

    print(len(get_nodes_by_type(quantization_model, ['FakeQuantize'])))
    check_model(tmp_path, quantization_model, model_name + '_ig_params',
                model_framework)
def test_statistics_collector_subsets(tmp_path, models, model_name, model_framework,
                                      quantization_mode, inplace_statistics, algorithm,
                                      preset, granularity, add_output_nodes, type_max, type_min):
    model, engine, collector, algo_config = create_(tmp_path, models, model_name, model_framework,
                                                    quantization_mode, algorithm.name, preset,
                                                    granularity, type_max, type_min)
    algo = algorithm(algo_config, engine)
    algo._config['inplace_statistics'] = inplace_statistics
    algo.register_statistics(model, collector)
    statistic_graph_builder = StatisticGraphBuilder()
    act_stats_layout, stat_aliases = merge_stats_by_algo_names([algorithm.name], collector._layout_by_algo)
    model_with_nodes, nodes_names, _ = statistic_graph_builder.insert_statistic(model, act_stats_layout, stat_aliases)
    ir_name = f'{model_name}_stat_{type_max}_{type_min}' if type_min is not None \
        else f'{model_name}_stat_mean'
    check_model(tmp_path, model_with_nodes, ir_name, model_framework)
    assert len(set(nodes_names)) == add_output_nodes
def test_outlier_channel_splitting_algo(models, tmp_path,
                                        weights_expansion_ratio):
    algorithm_config = Dict({
        'weights_expansion_ratio': weights_expansion_ratio,
    })

    model = models.get(TEST_MODEL_NAME, TEST_MODEL_FRAMEWORK, tmp_path)
    model = load_model(model.model_params)

    algorithm = OutlierChannelSplitting(algorithm_config, None)
    algorithm.run(model)

    check_model(tmp_path,
                model,
                TEST_MODEL_NAME + '_{}'.format(weights_expansion_ratio),
                TEST_MODEL_FRAMEWORK,
                check_weights=True)