예제 #1
0
    def is_able_to_wrap(node):
        if node.type not in ['Convolution', 'MatMul', 'GroupConvolution']:
            return False

        node_weight = nu.get_node_input(node, 1)
        if node_weight.type == 'FakeQuantize':
            node_weight = nu.get_node_input(node_weight, 0)
        if node_weight.type != 'Const':
            return False

        if node.type != 'MatMul':

            weights = nu.get_node_value(node_weight)
            if len(weights.shape) != 4:
                return False

            s = node.stride
            stride_check = (s[2] == s[3])

            d = node.dilation
            dilation_check = (d[2] == d[3])

            if not dilation_check or not stride_check:
                return False

        bias_node = nu.get_bias_for_node(node)
        if bias_node is not None:
            bias_value = nu.get_node_value(bias_node)
            if bias_value.shape[0] != 1:
                return False
        return True
예제 #2
0
def get_weight_node(node, port_id=1):
    node_weight = nu.get_node_input(node, port_id)
    if node_weight.type == 'FakeQuantize':
        node_weight = nu.get_node_input(node_weight, 0)
    if node_weight.type != 'Const':
        raise ValueError('Provided weight node is not Const!')
    return node_weight
예제 #3
0
def build_graph_for_node(model, input_name, input_shape, node, remove_bias=False, remove_fake_quantize=False):
    """ Build the Graph (input - node - output). The Convolution, FullyConnected node types are supported.
     :param model: source model
     :param input_name: name of the input node in the generated graph
     :param input_shape: shape of the input node in the generated graph
     :param node: node for which graph (input - node - output) will be generated
     :param remove_bias: remove bias in the generated graph
     :param remove_fake_quantize: remove fake quantize nodes in the generated graph
     :return: generated graph.
    """
    input_data_type = get_node_data_type(node, 0)
    nodes, edges = [], []
    nodes.append((input_name, 'Parameter', {'name': input_name, 'shape': input_shape,
                                            'type': 'Parameter', 'data_type': input_data_type}))

    node_attrs = deepcopy(node.attrs())
    if node.has_valid('output') and node.has_valid('get_output_feature_dim'):
        node_attrs['get_output_feature_dim'] = None

    nodes.append((node.name, node.type, node_attrs))
    edges.append((input_name, node.name, {'out': 0, 'in': 0}))

    parent_nodes = get_node_inputs(node)
    if parent_nodes[1].type == 'FakeQuantize' and not remove_fake_quantize:
        fq = parent_nodes[1]
        fq_name = make_copy_fake_quantize(nodes, edges, fq)
        edges.append((fq_name, node.name, {'out': 0, 'in': 1}))
    else:
        weights = parent_nodes[1]
        nodes.append((weights.name, weights.type, {'value': weights.value.copy()}))
        edges.append((weights.name, node.name, {'out': 0, 'in': 1}))

    if not remove_bias:
        if parent_nodes[2].type == 'FakeQuantize' and not remove_fake_quantize:
            fq = parent_nodes[1]
            fq_name = make_copy_fake_quantize(nodes, edges, fq)
            edges.append((fq_name, node.name, {'out': 0, 'in': 2}))
        else:
            weights = parent_nodes[2]
            nodes.append((weights.name, weights.type, {'value': weights.value.copy()}))
            edges.append((weights.name, node.name, {'out': 0, 'in': 2}))

    result_name = '{}/out'.format(node.name)
    nodes.append((result_name, 'Result', {}))
    edges.append((node.name, result_name, {'out': 0, 'in': 0}))
    graph = build_graph(*make_copy_graph_attrs(model, input_name, input_shape), nodes, edges)
    graph.ir_v10 = True

    # Add the neccessary attribute to the new graph
    src_node = get_node_by_name(graph, node.name)
    weights_node = get_node_input(src_node, 1)
    weights_node = get_node_input(weights_node, 0) \
        if weights_node.type == 'FakeQuantize' else weights_node
    weights_out_dtype = weights_node.out_port(0).get_data_type()
    src_out_dtype = src_node.out_port(0).get_data_type()
    if weights_out_dtype != src_out_dtype:
        weights_node.out_node(0)['Insert_Convert_operation_after'] = True

    return graph
예제 #4
0
파일: layers.py 프로젝트: KodiaqQ/openvino
    def __init__(self, node, device='cpu', asymmetric=False):
        super(FakeQuantize, self).__init__()
        self.node = node
        self.device = device
        input_0 = nu.get_node_input(self.node, 0)
        self.is_weight_fq = input_0.type == 'Const'
        self.asymmetric = asymmetric

        min_val = nu.get_node_value(nu.get_node_input(self.node, 1))
        max_val = nu.get_node_value(nu.get_node_input(self.node, 2))
        min_val = np.array(min_val, dtype=np.float32)
        self.min = torch.tensor(min_val).to(self.device)
        self.min = torch.nn.Parameter(self.min) if self.asymmetric else self.min

        ranges = np.array(max_val - min_val, dtype=np.float32)
        self.scale = torch.tensor(ranges).log()
        self.scale = self.scale.to(self.device)
        self.scale = torch.nn.Parameter(self.scale)

        self.val_h = int(self.node.levels - 1)
        self.val_l = 0
예제 #5
0
    def get_parameter_meta(self, model, optimizer_state):
        param_grid = []
        if 'range_estimator' in self._config.tuning_scope:
            for variable in self._config.estimator_tuning_scope:
                self._config.tuning_scope.append('estimator_' + variable)
        config = deepcopy(self._config)
        if optimizer_state['first_iteration'] or optimizer_state[
                'fully_quantized']:
            config['tuning_scope'] = []

        hardware_config = load_hardware_config(config)
        model = deepcopy(model)
        fqut.insert_fake_quantize_nodes(config, model)
        fq_configuration = read_all_fake_quantize_configurations(
            config, hardware_config, model)

        nodes_config = {}
        for fq in get_nodes_by_type(model, ['FakeQuantize']):
            node_input = get_node_input(fq, 0)
            op_type = 'weights' if node_input.type == 'Const' else 'activations'
            fq_node_config = fq_configuration[fq.name][op_type]
            for child_name, child_config in fq_node_config:
                if child_name not in nodes_config:
                    nodes_config[child_name] = {
                        'weights': [],
                        'activations': []
                    }
                nodes_config[child_name][op_type].extend(child_config)

        for node_name, node_config in nodes_config.items():
            if 'activations' in node_config:
                node_config['activations'] = ut.append_estimator_configs(
                    node_config['activations'], False, config,
                    self.params[node_name]
                    if not optimizer_state['fully_quantized']
                    and node_name in self.params else None)
            if 'weights' in node_config:
                node_config['weights'] = ut.append_estimator_configs(
                    node_config['weights'], True, config,
                    self.params[node_name]
                    if not optimizer_state['fully_quantized']
                    and node_name in self.params else None)

        for node_name, node_config in nodes_config.items():
            op_config = ut.get_quantize_op_config(
                node_config, config, self.params[node_name]
                if not optimizer_state['fully_quantized']
                and node_name in self.params else None)
            param_grid.append((node_name, 'choice', op_config))
        return param_grid
예제 #6
0
    def __init__(self,
                 node,
                 input_fq=None,
                 wrap_weight_fq=False,
                 device='cpu',
                 set_quantized_values_to_weight_parameter=False,
                 asymmetric=False):
        super().__init__()

        self.node = node
        self.device = device

        self.set_quantized_values_to_weight_parameter = set_quantized_values_to_weight_parameter
        self.weight_fq, self.input_fq = None, input_fq

        if wrap_weight_fq:
            weight_fq = nu.get_node_input(self.node, 1)
            weight_fq_wrapper = FakeQuantize
            if not weight_fq_wrapper.is_able_to_wrap(weight_fq):
                logger.warning('Was not able to wrap layer %s with pytorch',
                               weight_fq.name)
                self.weight_fq = None
            else:
                self.weight_fq = weight_fq_wrapper(weight_fq,
                                                   device=device,
                                                   asymmetric=asymmetric)

        node_weight = get_weight_node(node)
        weights = nu.get_node_value(node_weight)
        self.weights_dtype = weights.dtype
        weights = torch.from_numpy(weights).to(torch.float32)
        weights = weights.to(device)
        self.weights = torch.nn.Parameter(weights)

        self.bias = None
        bias_node = nu.get_bias_for_node(self.node)
        if bias_node is not None:
            bias = nu.get_node_value(bias_node)
            self.bias_dtype = bias.dtype
            bias = torch.from_numpy(bias).to(torch.float32).squeeze()
            bias = bias if bias.shape else bias.reshape(1)
            bias = bias.to(device)
            self.bias = torch.nn.Parameter(bias)

        if self.node.type != 'MatMul':
            self.stride = (int(node.stride[2]), int(node.stride[3]))
            self.pads_begin, self.pads_end = node.pad[2], node.pad[3]
            self.dilation = (int(node.dilation[2]), int(node.dilation[3]))
            self.group = 1 if 'group' not in node else int(node.group)
예제 #7
0
def check_model_sparsity_level(model,
                               sparsity_ignored_scope,
                               target_sparsity_level,
                               strict=False,
                               count_ignored_nodes=True):
    """
    Check if tuned model has the same sparsity level as set in the config
    :param model: model: NetworkX model
    :param sparsity_ignored_scope: list of layers ignored during sparsification: list
    :param target_sparsity_level: desired sparsity level of the model: float
    :param strict: whether to raise an error if actual sparsity does not equal target: bool
    :param count_ignored_nodes: whether to include non-sparsified nodes when considering total weight count: bool
    """
    perlayer_weight_sizes = []
    perlayer_sparsity_rates = []
    all_nodes_with_weights = get_nodes_by_type(
        model, [op['type'] for op in OPERATIONS_WITH_WEIGHTS])
    all_nodes_with_weights = [
        n for n in all_nodes_with_weights
        if nu.get_node_input(n, 1).type == 'Const'
    ]
    if sparsity_ignored_scope is not None and not count_ignored_nodes:
        all_nodes_with_weights = [
            node for node in all_nodes_with_weights
            if (node.name not in sparsity_ignored_scope)
        ]
    for node in all_nodes_with_weights:
        weight_node = nu.get_weights_for_node(node)
        if weight_node is not None:
            weight = nu.get_node_value(weight_node)
            perlayer_sparsity_rates.append(np.sum(weight == 0) / weight.size)
            perlayer_weight_sizes.append(weight.size)

    logger.debug('Per-layer sparsity levels: %s', perlayer_sparsity_rates)
    logger.debug('Per-layer weight sizes %s', perlayer_weight_sizes)

    global_sparsity_rate = np.dot(
        perlayer_sparsity_rates,
        perlayer_weight_sizes) / np.sum(perlayer_weight_sizes)
    logger.info('Sparsity rate after tuning: %s', global_sparsity_rate)
    if strict and not np.isclose(
            global_sparsity_rate, target_sparsity_level, atol=1e-2):
        raise RuntimeError('Target sparisty level {} was '
                           'not reached for the model: {}'.format(
                               target_sparsity_level, global_sparsity_rate))
예제 #8
0
    def get_parameter_meta(self, model):
        param_grid = []
        config = deepcopy(self._config)

        hardware_config = load_hardware_config(config)
        model = deepcopy(model)
        fqut.insert_fake_quantize_nodes(config, model)
        fq_configuration = read_all_fake_quantize_configurations(
            config, hardware_config, model)

        nodes_config = {}
        for fq in get_nodes_by_type(model, ['FakeQuantize']):
            node_input = get_node_input(fq, 0)
            op_type = 'weights' if node_input.type == 'Const' else 'activations'
            fq_node_config = fq_configuration[fq.fullname][op_type]
            for child_name, child_config in fq_node_config:
                if child_name not in nodes_config:
                    nodes_config[child_name] = {
                        'weights': [],
                        'activations': []
                    }
                nodes_config[child_name][op_type].extend(child_config)

        for node_name, node_config in nodes_config.items():
            if 'activations' in node_config:
                node_config['activations'] = ut.append_estimator_configs(
                    node_config['activations'], False, config,
                    self.params[node_name]
                    if node_name in self.params else None)
            if 'weights' in node_config:
                node_config['weights'] = ut.append_estimator_configs(
                    node_config['weights'], True, config,
                    self.params[node_name]
                    if node_name in self.params else None)

        for node_name, node_config in nodes_config.items():
            op_config = ut.get_quantize_op_config(
                node_config, config,
                self.params[node_name] if node_name in self.params else None)
            param_grid.append((node_name, 'choice', op_config))
        return param_grid
예제 #9
0
def test_fake_quantize_configurations(tmp_path, models, model_name,
                                      model_framework, algo_mode):
    test_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            './data/reference_scale/test_data')

    config = _get_pytorch_accuracy_checker_config(test_dir) \
        if model_framework == 'pytorch' else _get_tf_accuracy_checker_config(test_dir)

    if algo_mode == 'symmetric':
        activations_mode, weights_mode, level_low = 'symmetric', 'symmetric', -127
    elif algo_mode == 'asymmetric':
        activations_mode, weights_mode, level_low = 'asymmetric', 'asymmetric', -128
    else:
        activations_mode, weights_mode, level_low = 'asymmetric', 'symmetric', -127

    compression_config = Dict({
        'name': 'MinMaxQuantization',
        'stat_subset_size': 1,
        'preset': 'performance',
        'target_device': 'CPU',
        'activations': {
            'bits': 8,
            'mode': activations_mode
        },
        'weights': {
            'bits': 8,
            'mode': weights_mode,
            'granularity': 'perchannel',
            'level_low': level_low,
            'level_high': 127
        }
    })

    def _make_list(x):
        if isinstance(x, np.ndarray):
            x = x.tolist()
        if isinstance(x, list):
            return x
        return [x]

    engine = ACEngine(config)
    compression_config.subset_indices = [0]
    algo = COMPRESSION_ALGORITHMS.get('MinMaxQuantization')(compression_config,
                                                            engine)
    model = models.get(model_name, model_framework, tmp_path)
    model = load_model(model.model_params)

    stats_collector = StatisticsCollector(engine)
    algo.register_statistics(model, stats_collector)
    stats_collector.compute_statistics(model)

    model = algo.run(model)

    refs_path = os.path.join(REFERENCES_DIR,
                             '{}_{}.json'.format(model_name, algo_mode))
    local_path = os.path.join(tmp_path, '{}.json'.format(model_name))

    ref_exists = os.path.isfile(refs_path)

    refs = load_refs(refs_path) if ref_exists else {}
    ref_file = None if ref_exists else open(refs_path, 'w')
    local_file = open(local_path, 'w')
    model_values = {}

    eps = 1e-3
    fq_list = mu.get_nodes_by_type(model, ['FakeQuantize'])
    for fq in sorted(fq_list, key=lambda item: item.name):
        min_levels, max_levels = tuple(
            [get_node_value(node) for node in get_node_inputs(fq)[1:3]])
        fq_name = fq.name
        if get_node_input(fq, 0).type == 'Const':
            min_levels = min_levels.reshape(min_levels.shape[0])
            max_levels = max_levels.reshape(max_levels.shape[0])
        else:
            if not min_levels.shape and not max_levels.shape:
                pass
            else:
                min_levels = min_levels.reshape(min_levels.shape[1])
                max_levels = max_levels.reshape(max_levels.shape[1])

        min_levels = _make_list(min_levels)
        max_levels = _make_list(max_levels)
        model_values[fq_name] = {'max': max_levels, 'min': min_levels}

    if not ref_exists:
        json.dump(model_values, ref_file)
        return
    json.dump(model_values, local_file)

    for ref_name in refs:
        refs_min_levels = _make_list(refs[ref_name]['min'])
        refs_max_levels = _make_list(refs[ref_name]['max'])
        min_levels = model_values[ref_name]['min']
        max_levels = model_values[ref_name]['max']

        for min_level, max_level, ref_min, ref_max in zip(
                min_levels, max_levels, refs_min_levels, refs_max_levels):
            assert abs(min_level - ref_min) < eps
            assert abs(max_level - ref_max) < eps