def test_lstm_ends(tmp_path, models): model_name, model_framework, lstm_ends_ref = MODELS_WITH_LSTM[0] model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) read_values = get_nodes_by_type(model, ['ReadValue']) assigns = get_nodes_by_type(model, ['Assign']) for read_value in read_values: assert read_value.name in lstm_ends_ref lstm_ends = nu.get_lstm_ends(read_value, assigns, []) lstm_ends_names = [n.name for n in lstm_ends] assert sorted(lstm_ends_names) == sorted(lstm_ends_ref[read_value.name])
def test_generate_image(tmp_path, models, model_name, model_framework, layout, input_shape): path_image_data = os.path.join(tmp_path, 'pot_dataset') stat_subset_size = 5 engine_config = Dict({ 'device': 'CPU', 'type': 'data_free', 'data_source': path_image_data, 'subset_size': stat_subset_size, 'layout': layout, 'shape': input_shape, 'generate_data': 'True' }) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) data_loader = create_data_loader(engine_config, model) num_images_from_data_loader = len(list(data_loader)) num_images_in_dir = len(os.listdir(path_image_data)) assert num_images_from_data_loader == num_images_in_dir == stat_subset_size image = data_loader[0] if input_shape is None: in_node = get_nodes_by_type(model, ['Parameter'], recursively=False)[0] input_shape = tuple(in_node.shape[1:]) elif len(input_shape) == 4: input_shape = input_shape[1:] assert image.shape == input_shape
def test_multibranch_propagation_with_fq_moving(): TEST_CASES_PATH = TEST_ROOT / 'data' / 'test_cases_refs' model_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.xml').as_posix() weights_path = (TEST_CASES_PATH / 'test_ig_border_case_with_fq_moving.bin').as_posix() ignored_params = { "scope": [ '8/WithoutBiases', '9/WithoutBiases', '10/WithoutBiases', '11/WithoutBiases' ] } config = Dict({'model': model_path, 'weights': weights_path}) model = load_model(config) hardware_config = HardwareConfig.from_json( (HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize( model, ignored_params) node = get_node_by_name(quantized_model, '14') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert get_node_inputs(node)[2].type == 'Concat' node = get_node_by_name(quantized_model, '12') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 6
def create_data_loader(config, model): """ Factory to create instance of engine class based on config :param config: engine config section from toolkit config file :param model: CompressedModel instance to find out input shape :return: instance of DataLoader descendant class """ inputs = get_nodes_by_type(model, ['Parameter'], recursively=False) if len(inputs) > 1 and\ not any([tuple(i.shape) == (1, 3) for i in inputs]): raise RuntimeError('IEEngine supports networks with single input or net with 2 inputs. ' 'In second case there are image input and image info input ' 'Actual inputs number: {}'.format(len(inputs))) data_loader = None for in_node in inputs: if tuple(in_node.shape) != (1, 3): data_loader = ImageLoader(config) data_loader.shape = in_node.shape data_loader.get_layout(in_node) return data_loader if data_loader is None: raise RuntimeError('There is no node with image input') return data_loader
def test_first_convolutions_search(tmp_path, models, model_name, model_framework, first_convs_ref): model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) input_nodes = get_nodes_by_type(model, ['Parameter']) first_convs = get_first_convolutions(input_nodes) first_convs_names = [n.name for n in first_convs] assert sorted(first_convs_names) == sorted(first_convs_ref)
def get_fq_nodes_stats_algo(model, preset, bits, is_weights, clipping_value=None): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) compression_config = Dict( { 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': preset, 'target_device': 'CPU', 'activations': { 'bits': bits, 'range_estimator': { 'max': { 'clipping_value': clipping_value } } }, 'weights': { 'bits': bits, 'mode': 'symmetric' if preset == 'performance' else 'asymmetric' } }) engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) out = {} for fq in mu.get_nodes_by_type(model, ['FakeQuantize']): fq_inputs = get_node_inputs(fq) if is_weights and fq_inputs[0].type == 'Const': min_weights = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[0])) max_weights = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[0])) out[fq.name] = {'low_level': min_weights, 'high_level': max_weights} elif not is_weights and fq_inputs[0].type != 'Const': if not fq_inputs[1].value.shape: out[fq.name] = {'low_level': fq_inputs[1].value, 'high_level': fq_inputs[2].value} else: min_act = np.reshape(fq_inputs[1].value, (fq_inputs[1].value.shape[1])) max_act = np.reshape(fq_inputs[2].value, (fq_inputs[2].value.shape[1])) out[fq.name] = {'low_level': min_act, 'high_level': max_act} return out
def test_build_quantization_graph_with_ignored_params(tmp_path, models, model_name, model_framework): if model_name in CASCADE_MAP: model = models.get_cascade(model_name, model_framework, tmp_path, CASCADE_MAP[model_name]) else: model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) if model_name not in CASCADE_MAP: ignored_params = { 'operations': [{ 'type': 'Add', }, { 'type': 'Convolution', 'attributes': { 'output': 1280, 'group': 1 } }] } if model_name == 'resnet_example': ignored_params['scope'] = [ 'Conv_11/WithoutBiases', 'Conv_29/WithoutBiases' ] elif model_name == 'googlenet_example': node_name = 'Conv_10/WithoutBiases' ignored_params['scope'] = [node_name] elif model_name == 'mtcnn': ignored_params = { 'pnet': { 'scope': ['conv1/WithoutBiases', 'conv3/WithoutBiases'] }, 'rnet': { 'skip_model': True }, 'onet': { 'operations': [{ 'type': 'MatMul' }] } } quantization_model = GraphTransformer( hardware_config).insert_fake_quantize(model, ignored_params) print(len(get_nodes_by_type(quantization_model, ['FakeQuantize']))) check_model(tmp_path, quantization_model, model_name + '_ig_params', model_framework)
def get_parameter_meta(self, model, optimizer_state): param_grid = [] if 'range_estimator' in self._config.tuning_scope: for variable in self._config.estimator_tuning_scope: self._config.tuning_scope.append('estimator_' + variable) config = deepcopy(self._config) if optimizer_state['first_iteration'] or optimizer_state[ 'fully_quantized']: config['tuning_scope'] = [] hardware_config = load_hardware_config(config) model = deepcopy(model) fqut.insert_fake_quantize_nodes(config, model) fq_configuration = read_all_fake_quantize_configurations( config, hardware_config, model) nodes_config = {} for fq in get_nodes_by_type(model, ['FakeQuantize']): node_input = get_node_input(fq, 0) op_type = 'weights' if node_input.type == 'Const' else 'activations' fq_node_config = fq_configuration[fq.name][op_type] for child_name, child_config in fq_node_config: if child_name not in nodes_config: nodes_config[child_name] = { 'weights': [], 'activations': [] } nodes_config[child_name][op_type].extend(child_config) for node_name, node_config in nodes_config.items(): if 'activations' in node_config: node_config['activations'] = ut.append_estimator_configs( node_config['activations'], False, config, self.params[node_name] if not optimizer_state['fully_quantized'] and node_name in self.params else None) if 'weights' in node_config: node_config['weights'] = ut.append_estimator_configs( node_config['weights'], True, config, self.params[node_name] if not optimizer_state['fully_quantized'] and node_name in self.params else None) for node_name, node_config in nodes_config.items(): op_config = ut.get_quantize_op_config( node_config, config, self.params[node_name] if not optimizer_state['fully_quantized'] and node_name in self.params else None) param_grid.append((node_name, 'choice', op_config)) return param_grid
def test_multibranch_propagation_without_fq_moving(tmp_path, models, model_name, model_framework): ignored_params = { "scope": ['Convolution_104', 'Convolution_152', 'Convolution_8', 'Convolution_56'] } model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) hardware_config = HardwareConfig.from_json((HARDWARE_CONFIG_PATH / 'cpu.json').as_posix()) quantized_model = GraphTransformer(hardware_config).insert_fake_quantize(model, ignored_params) node = get_node_by_name(quantized_model, 'Convolution_201') for node_input in get_node_inputs(node)[:2]: assert node_input.type == 'FakeQuantize' assert len(get_nodes_by_type(quantized_model, ['FakeQuantize'])) == 2
def check_model_sparsity_level(model, sparsity_ignored_scope, target_sparsity_level, strict=False, count_ignored_nodes=True): """ Check if tuned model has the same sparsity level as set in the config :param model: model: NetworkX model :param sparsity_ignored_scope: list of layers ignored during sparsification: list :param target_sparsity_level: desired sparsity level of the model: float :param strict: whether to raise an error if actual sparsity does not equal target: bool :param count_ignored_nodes: whether to include non-sparsified nodes when considering total weight count: bool """ perlayer_weight_sizes = [] perlayer_sparsity_rates = [] all_nodes_with_weights = get_nodes_by_type( model, [op['type'] for op in OPERATIONS_WITH_WEIGHTS]) all_nodes_with_weights = [ n for n in all_nodes_with_weights if nu.get_node_input(n, 1).type == 'Const' ] if sparsity_ignored_scope is not None and not count_ignored_nodes: all_nodes_with_weights = [ node for node in all_nodes_with_weights if (node.name not in sparsity_ignored_scope) ] for node in all_nodes_with_weights: weight_node = nu.get_weights_for_node(node) if weight_node is not None: weight = nu.get_node_value(weight_node) perlayer_sparsity_rates.append(np.sum(weight == 0) / weight.size) perlayer_weight_sizes.append(weight.size) logger.debug('Per-layer sparsity levels: %s', perlayer_sparsity_rates) logger.debug('Per-layer weight sizes %s', perlayer_weight_sizes) global_sparsity_rate = np.dot( perlayer_sparsity_rates, perlayer_weight_sizes) / np.sum(perlayer_weight_sizes) logger.info('Sparsity rate after tuning: %s', global_sparsity_rate) if strict and not np.isclose( global_sparsity_rate, target_sparsity_level, atol=1e-2): raise RuntimeError('Target sparisty level {} was ' 'not reached for the model: {}'.format( target_sparsity_level, global_sparsity_rate))
def get_parameter_meta(self, model): param_grid = [] config = deepcopy(self._config) hardware_config = load_hardware_config(config) model = deepcopy(model) fqut.insert_fake_quantize_nodes(config, model) fq_configuration = read_all_fake_quantize_configurations( config, hardware_config, model) nodes_config = {} for fq in get_nodes_by_type(model, ['FakeQuantize']): node_input = get_node_input(fq, 0) op_type = 'weights' if node_input.type == 'Const' else 'activations' fq_node_config = fq_configuration[fq.fullname][op_type] for child_name, child_config in fq_node_config: if child_name not in nodes_config: nodes_config[child_name] = { 'weights': [], 'activations': [] } nodes_config[child_name][op_type].extend(child_config) for node_name, node_config in nodes_config.items(): if 'activations' in node_config: node_config['activations'] = ut.append_estimator_configs( node_config['activations'], False, config, self.params[node_name] if node_name in self.params else None) if 'weights' in node_config: node_config['weights'] = ut.append_estimator_configs( node_config['weights'], True, config, self.params[node_name] if node_name in self.params else None) for node_name, node_config in nodes_config.items(): op_config = ut.get_quantize_op_config( node_config, config, self.params[node_name] if node_name in self.params else None) param_grid.append((node_name, 'choice', op_config)) return param_grid
def test_fake_quantize_configurations(tmp_path, models, model_name, model_framework, algo_mode): test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), './data/reference_scale/test_data') config = _get_pytorch_accuracy_checker_config(test_dir) \ if model_framework == 'pytorch' else _get_tf_accuracy_checker_config(test_dir) if algo_mode == 'symmetric': activations_mode, weights_mode, level_low = 'symmetric', 'symmetric', -127 elif algo_mode == 'asymmetric': activations_mode, weights_mode, level_low = 'asymmetric', 'asymmetric', -128 else: activations_mode, weights_mode, level_low = 'asymmetric', 'symmetric', -127 compression_config = Dict({ 'name': 'MinMaxQuantization', 'stat_subset_size': 1, 'preset': 'performance', 'target_device': 'CPU', 'activations': { 'bits': 8, 'mode': activations_mode }, 'weights': { 'bits': 8, 'mode': weights_mode, 'granularity': 'perchannel', 'level_low': level_low, 'level_high': 127 } }) def _make_list(x): if isinstance(x, np.ndarray): x = x.tolist() if isinstance(x, list): return x return [x] engine = ACEngine(config) compression_config.subset_indices = [0] algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config, engine) model = models.get(model_name, model_framework, tmp_path) model = load_model(model.model_params) stats_collector = StatisticsCollector(engine) algo.register_statistics(model, stats_collector) stats_collector.compute_statistics(model) model = algo.run(model) refs_path = os.path.join(REFERENCES_DIR, '{}_{}.json'.format(model_name, algo_mode)) local_path = os.path.join(tmp_path, '{}.json'.format(model_name)) ref_exists = os.path.isfile(refs_path) refs = load_refs(refs_path) if ref_exists else {} ref_file = None if ref_exists else open(refs_path, 'w') local_file = open(local_path, 'w') model_values = {} eps = 1e-3 fq_list = mu.get_nodes_by_type(model, ['FakeQuantize']) for fq in sorted(fq_list, key=lambda item: item.name): min_levels, max_levels = tuple( [get_node_value(node) for node in get_node_inputs(fq)[1:3]]) fq_name = fq.name if get_node_input(fq, 0).type == 'Const': min_levels = min_levels.reshape(min_levels.shape[0]) max_levels = max_levels.reshape(max_levels.shape[0]) else: if not min_levels.shape and not max_levels.shape: pass else: min_levels = min_levels.reshape(min_levels.shape[1]) max_levels = max_levels.reshape(max_levels.shape[1]) min_levels = _make_list(min_levels) max_levels = _make_list(max_levels) model_values[fq_name] = {'max': max_levels, 'min': min_levels} if not ref_exists: json.dump(model_values, ref_file) return json.dump(model_values, local_file) for ref_name in refs: refs_min_levels = _make_list(refs[ref_name]['min']) refs_max_levels = _make_list(refs[ref_name]['max']) min_levels = model_values[ref_name]['min'] max_levels = model_values[ref_name]['max'] for min_level, max_level, ref_min, ref_max in zip( min_levels, max_levels, refs_min_levels, refs_max_levels): assert abs(min_level - ref_min) < eps assert abs(max_level - ref_max) < eps