def test_quantization_with_finetuning_algo(models, tmp_path, model_params): model_name, model_framework, algo_name, preset, expected_accuracy = model_params if not TORCH_AVAILABLE: warnings.warn( UserWarning( 'Skipping layerwise finetuning test since torch is not importable' )) return additional_params = { 'use_layerwise_tuning': True, 'batch_size': 20, 'num_samples_for_tuning': 40, } algorithm_config = make_algo_config(algo_name, preset, additional_params=additional_params) model = models.get(model_name, model_framework, tmp_path) reference_name = model_name + '_quantized_tuned' metrics, quantized_model = run_algo(model, model_name, algorithm_config, tmp_path, reference_name) for metric_name in metrics: print('{}: {:.4f}'.format(metric_name, metrics[metric_name])) assert metrics == pytest.approx(expected_accuracy, abs=0.006) check_model(tmp_path, quantized_model, reference_name, model_framework, check_weights=False)
def test_build_quantization_graph_with_ignored_blocks(tmp_path, models, model_name, model_framework): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model) check_model(tmp_path, quantization_model, model_name + '_ig_pt', model_framework)
def test_sparsity_with_finetuning_algo(models, tmp_path, model_params): model_name, model_framework, algo_name, preset, sparsity_level, expected_accuracy = model_params if not TORCH_AVAILABLE: warnings.warn(UserWarning('Skipping layerwise finetuning test since torch is not importable')) return additional_params = { 'sparsity_level': sparsity_level, 'stat_subset_size': 300, 'use_layerwise_tuning': True, 'weights_lr': 1e-5, 'bias_lr': 1e-3, 'batch_size': 20, 'num_samples_for_tuning': 40, 'tuning_iterations': 1, 'use_ranking_subset': False, } algorithm_config = make_algo_config(algo_name, preset, additional_params=additional_params) model = models.get(model_name, model_framework, tmp_path) reference_name = model_name + '_sparse_tuned' metrics, sparse_model = run_algo(model, model_name, algorithm_config, tmp_path, reference_name) check_model_sparsity_level(sparse_model, None, sparsity_level, strict=True) for metric_name in metrics: print('{}: {:.4f}'.format(metric_name, metrics[metric_name])) assert metrics == pytest.approx(expected_accuracy, abs=0.006) check_model(tmp_path, sparse_model, reference_name, model_framework, check_weights=False)
def test_build_quantization_graph(tmp_path, models, model_name, model_framework, target_device): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params, target_device=target_device) if target_device == 'GNA': hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix()) else: hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model) check_model(tmp_path, quantization_model, model_name, model_framework)
def cut_fq_node(model, node_list, graph_transformer, tmp_path): model_ = load_model(model.model_params) quantized_model = graph_transformer.insert_fake_quantize(model_) cropped_model = quantized_model for node_name in node_list: node = get_node_by_name(cropped_model, node_name) for parent_node in nu.get_node_inputs(node): if parent_node and parent_node and parent_node.type == 'FakeQuantize': cropped_model, *_ = graph_transformer.remove_fq_nodes(quantized_model, [parent_node.name]) break check_model(tmp_path, cropped_model, model.model_name + '_cut_fq', model.framework)
def test_build_quantization_graph_with_ignored_params(tmp_path, models, model_name, model_framework): if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) if model_name not in CASCADE_MAP: ignored_params = { 'operations': [{ 'type': 'Add', }, { 'type': 'Convolution', 'attributes': { 'output': 1280, 'group': 1 } }] } if model_name == 'resnet_example': ignored_params['scope'] = [ 'Conv_11/WithoutBiases', 'Conv_29/WithoutBiases' ] elif model_name == 'googlenet_example': node_name = 'Conv_10/WithoutBiases' ignored_params['scope'] = [node_name] elif model_name == 'mtcnn': ignored_params = { 'pnet': { 'scope': ['conv1/WithoutBiases', 'conv3/WithoutBiases'] }, 'rnet': { 'skip_model': True }, 'onet': { 'operations': [{ 'type': 'MatMul' }] } } quantization_model = GraphTransformer( hardware_config).insert_fake_quantize(model, ignored_params) print(len(get_nodes_by_type(quantization_model, ['FakeQuantize']))) check_model(tmp_path, quantization_model, model_name + '_ig_params', model_framework)
def test_statistics_collector_subsets(tmp_path, models, model_name, model_framework, quantization_mode, inplace_statistics, algorithm, preset, granularity, add_output_nodes, type_max, type_min): model, engine, collector, algo_config = create_(tmp_path, models, model_name, model_framework, quantization_mode, algorithm.name, preset, granularity, type_max, type_min) algo = algorithm(algo_config, engine) algo._config['inplace_statistics'] = inplace_statistics algo.register_statistics(model, collector) statistic_graph_builder = StatisticGraphBuilder() act_stats_layout, stat_aliases = merge_stats_by_algo_names([algorithm.name], collector._layout_by_algo) model_with_nodes, nodes_names, _ = statistic_graph_builder.insert_statistic(model, act_stats_layout, stat_aliases) ir_name = f'{model_name}_stat_{type_max}_{type_min}' if type_min is not None \ else f'{model_name}_stat_mean' check_model(tmp_path, model_with_nodes, ir_name, model_framework) assert len(set(nodes_names)) == add_output_nodes
def test_outlier_channel_splitting_algo(models, tmp_path, weights_expansion_ratio): algorithm_config = Dict({ 'weights_expansion_ratio': weights_expansion_ratio, }) model = models.get(TEST_MODEL_NAME, TEST_MODEL_FRAMEWORK, tmp_path) model = load_model(model.model_params) algorithm = OutlierChannelSplitting(algorithm_config, None) algorithm.run(model) check_model(tmp_path, model, TEST_MODEL_NAME + '_{}'.format(weights_expansion_ratio), TEST_MODEL_FRAMEWORK, check_weights=True)