Example #1
0
    def test_shape_only(self):
        graph = build_graph(
            nodes_attributes, [('node_in_1', 'quantize'),
                               ('node_in_2', 'quantize'),
                               ('node_in_3', 'quantize'),
                               ('node_in_4', 'quantize'),
                               ('node_in_5', 'quantize'),
                               ('quantize', 'node_out_1'),
                               ('node_out_1', 'op_output')], {
                                   'node_out_1': {
                                       'shape': None
                                   },
                                   'node_in_1': {
                                       'shape': np.array([1, 3, 10, 20])
                                   },
                                   'node_in_2': {
                                       'shape': np.array([1, 3, 10, 20])
                                   },
                                   'node_in_3': {
                                       'shape': np.array([1, 3, 10, 20])
                                   },
                                   'node_in_4': {
                                       'shape': np.array([1, 3, 10, 20])
                                   },
                                   'node_in_5': {
                                       'shape': np.array([1, 3, 10, 20])
                                   },
                               })

        quantize_node = Node(graph, 'quantize')
        FakeQuantize.infer(quantize_node)
        quantize_shape = np.array([1, 3, 10, 20])
        res_shape = graph.node['node_out_1']['shape']
        for i in range(0, len(quantize_shape)):
            self.assertEqual(quantize_shape[i], res_shape[i])
    def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):

        q = match['quantize']
        dq = match['dequantize']

        q_scale = q.in_port(1).get_source().node
        q_zerop = q.in_port(2).get_source().node
        dq_scale = dq.in_port(1).get_source().node
        dq_zerop = dq.in_port(2).get_source().node

        inp_port = q.in_port(0).get_source()
        name = inp_port.node.soft_get('name', inp_port.node.id)

        # only constant as for zero_point/scale supported
        if q_scale.soft_get('type') == 'Const' and dq_scale.soft_get('type') == 'Const' and \
                q_zerop.soft_get('type') == 'Const' and dq_zerop.soft_get('type') == 'Const':

            # only patterns with same scale/zero_point values for Q and DQ are supported
            if q_scale.value == dq_scale.value and q_zerop.value == dq_zerop.value:
                log.debug('Found Q-DQ pattern after {}'.format(name))

                zero_point_type = q_zerop.value.dtype
                # data type affects range of output values: [-128..127] or [0..255]
                if zero_point_type == np.int8:
                    output_min_value = -128.0
                    output_max_value = 127.0
                elif zero_point_type == np.uint8:
                    output_min_value = 0.0
                    output_max_value = 255.0
                else:
                    raise Error('Not supported type {} for zero point value in node {}'.format(
                        zero_point_type, q_zerop.soft_get('name')))
                min_value = q_scale.value * (output_min_value - q_zerop.value)
                max_value = q_scale.value * (output_max_value - q_zerop.value)
                input_min = Const(graph, {'value': np.array(min_value)}).create_node()
                input_max = Const(graph, {'value': np.array(max_value)}).create_node()

                FQ = FakeQuantize(graph, {
                    'levels': 256,
                    'name': match['quantize'].name + '_Dequantize/FakeQuantize'
                }).create_node()

                FQ.in_port(0).connect(match['quantize'].in_port(0).get_source())
                FQ.in_port(1).connect(input_min.out_port(0))
                FQ.in_port(2).connect(input_max.out_port(0))
                FQ.in_port(3).connect(input_min.out_port(0))
                FQ.in_port(4).connect(input_max.out_port(0))

                match['dequantize'].out_port(0).get_connection().set_source(FQ.out_port(0))
                dq_name = match['dequantize'].soft_get('name', match['dequantize'].id)
                rename_nodes([(match['dequantize'], dq_name + '/to_be_removed'), (FQ, dq_name)])
            else:
                raise Error('QuantizeLinear and DequantizeLinear (after {}) have different scale or zero-point values, '
                            'cannot fuse into FakeQuantize!'.format(name))
Example #3
0
    def test_shape_and_value(self):
        graph = build_graph(nodes_attributes,
                            [('node_in_1', 'quantize'),
                             ('node_in_2', 'quantize'),
                             ('node_in_3', 'quantize'),
                             ('node_in_4', 'quantize'),
                             ('node_in_5', 'quantize'),
                             ('quantize', 'node_out_1'),
                             ('node_out_1', 'op_output')
                             ],
                            {
                                'node_out_1': {
                                    'shape': None,
                                    'value': None,
                                },
                                'node_in_1': {
                                    'shape': np.array([4]),
                                    'value': np.array([5, 17, 0, 100], dtype=np.float32),
                                },
                                'node_in_2': {
                                    'shape': np.array([4]),
                                    'value': np.array([0, 12, 12, 12], dtype=np.float32),
                                },
                                'node_in_3': {
                                    'shape': np.array([4]),
                                    'value': np.array([10, 20, 20, 20], dtype=np.float32),
                                },
                                'node_in_4': {
                                    'shape': np.array([4]),
                                    'value': np.array([0, 0, 0, 0], dtype=np.float32),
                                },
                                'node_in_5': {
                                    'shape': np.array([4]),
                                    'value': np.array([1, 1, 1, 1], dtype=np.float32),
                                },
                            })

        exp_node = Node(graph, 'quantize')
        FakeQuantize.infer(exp_node)
        quantize_shape = np.array([4])
        quantize_value = np.array([0, 1, 0, 1], dtype=np.float32)
        res_shape = graph.node['node_out_1']['shape']
        res_value = graph.node['node_out_1']['value']
        for i in range(0, len(quantize_shape)):
            self.assertEqual(quantize_shape[i], res_shape[i])
        for i in range(0, len(quantize_value)):
            self.assertAlmostEqual(quantize_value[i], res_value[i], places=6)
Example #4
0
def create_fake_quantize_node(graph: Graph, name):
    fq = FakeQuantize(graph, {
        'name': name,
        'levels': 0,
        'stop_value_propagation': True
    }).create_node()

    input_low = Const(graph, {
        'value': np.array(0.0).astype(np.float32)
    }).create_node()
    input_height = Const(graph, {
        'value': np.array(0.0).astype(np.float32)
    }).create_node()
    output_low = Const(graph, {
        'value': np.array(0.0).astype(np.float32)
    }).create_node()
    output_height = Const(graph, {
        'value': np.array(0.0).astype(np.float32)
    }).create_node()

    input_low.out_port(0).connect(fq.in_port(1))
    input_height.out_port(0).connect(fq.in_port(2))
    output_low.out_port(0).connect(fq.in_port(3))
    output_height.out_port(0).connect(fq.in_port(4))

    input_low.infer(input_low)
    input_height.infer(input_height)
    output_low.infer(output_low)
    output_height.infer(output_height)

    return fq
Example #5
0
    def replace_op(self, graph: Graph, node: Node):
        in_node_0 = node.in_node(0)

        broadcast = lambda x: np.array([x], dtype=np.float32)
        threshold = Const(graph, {
            'name': node.id + "/Input_1",
            "value": broadcast(0)
        }).create_node()
        in_1 = threshold
        in_2 = threshold
        in_3 = Const(graph, {
            'name': node.id + "/Input_3",
            "value": broadcast(-1)
        }).create_node()
        in_4 = Const(graph, {
            'name': node.id + "/Input_4",
            "value": broadcast(+1)
        }).create_node()
        quant = FakeQuantize(graph, {
            'name': node.id + "/FakeQuantize_",
            "levels": 2
        }).create_node(inputs=[in_node_0, in_1, in_2, in_3, in_4])

        return [quant.id]
    def replace_sub_graph(self, graph: Graph, match: Dict[str, Node]):
        node = match['op']
        name = node.name

        min_port_tuple = (node.in_port(1).get_source().node,
                          node.in_port(1).get_source().idx)
        max_port_tuple = (node.in_port(2).get_source().node,
                          node.in_port(2).get_source().idx)

        node.in_port(1).disconnect()
        node.in_port(2).disconnect()

        # make sure min < max
        min_less_max = Less(graph, {
            'name': name + '/if_min_less_max'
        }).create_node([min_port_tuple, max_port_tuple])
        minimum = Select(graph, {
            'name': name + '/minimum'
        }).create_node([min_less_max, min_port_tuple, max_port_tuple])
        maximum = Select(graph, {
            'name': name + '/maximum'
        }).create_node([min_less_max, max_port_tuple, min_port_tuple])

        # to create zero of limits data type, we multiply it by integer zero
        zero = create_op_node_with_second_input(graph,
                                                Mul,
                                                int64_array(0),
                                                {'name': name + '/zero'},
                                                input_node=minimum)

        # if 0 < min < max: min_adj = 0 and max_adj = max - min
        min_greater_zero = Greater(graph, {
            'name': name + '/if_minimum_greater_zero'
        }).create_node([minimum, zero])
        max_minus_min = Sub(graph, {
            'name': name + '/max_minus_min'
        }).create_node([maximum, minimum])
        minimum = Select(graph, {
            'name': name + '/first_adj_min'
        }).create_node([min_greater_zero, zero, minimum])
        maximum = Select(graph, {
            'name': name + '/first_adj_max'
        }).create_node([min_greater_zero, max_minus_min, maximum])

        # if min < max < 0: min_adj = min - max and max_adj = 0
        max_less_zero = Less(graph, {
            'name': name + '/if_max_less_zero'
        }).create_node([maximum, zero])
        min_minus_max = Sub(graph, {
            'name': name + '/min_minus_max'
        }).create_node([minimum, maximum])
        minimum = Select(graph, {
            'name': name + '/second_adj_min'
        }).create_node([max_less_zero, min_minus_max, minimum])
        maximum = Select(graph, {
            'name': name + '/second_adj_max'
        }).create_node([max_less_zero, zero, maximum])

        # scale = (max - min) / (2 ^ num_bits - 1),
        float_range = Sub(graph, {
            'name': name + '/float_range'
        }).create_node([maximum, minimum])
        quant_min_value, quant_max_value = int(
            node.narrow_range), 2**node.num_bits - 1
        int_range = Const(
            graph,
            dict(name=name + '/int_range',
                 value=quant_max_value - quant_min_value)).create_node()
        scale = Div(graph, {
            'name': name + '/scale'
        }).create_node([float_range, int_range])
        # min_adj = scale * round(min / scale)
        descaled_min = Div(graph, {
            'name': name + '/descaled_min'
        }).create_node([minimum, scale])
        rounded_descaled_min = Round(graph, {
            'name': name + '/rounded_descaled_min'
        }).create_node([descaled_min])
        min_adj = Mul(graph, {
            'name': name + '/min_adj'
        }).create_node([scale, rounded_descaled_min])
        # max_adj = max + min_adj - min.
        adjustment = Sub(graph, {
            'name': name + '/limits_adjustment'
        }).create_node([min_adj, minimum])
        max_adj = Add(graph, {
            'name': name + '/max_adj'
        }).create_node([maximum, adjustment])

        # FakeQuantize operation has 5 inputs instead of 3 inputs in TensorFlow
        node.add_input_port(3, skip_if_exist=True)
        node.add_input_port(4, skip_if_exist=True)

        node.in_port(1).connect(min_adj.out_port(0))
        node.in_port(2).connect(max_adj.out_port(0))
        node.in_port(3).connect(min_adj.out_port(0))
        node.in_port(4).connect(max_adj.out_port(0))

        FakeQuantize.update_node_stat(node, {'levels': node['levels']})
Example #7
0
 def extract(cls, node):
     levels = onnx_attr(node, 'levels', 'i')
     FakeQuantize.update_node_stat(node, {'levels': levels})
     return FakeQuantizeFrontExtractor.enabled
Example #8
0
    def replace_sub_graph(self, graph: Graph, match: Dict[str, Node]):
        node = match['op']
        name = node.name

        # Zero Point Nudging : Scale counting
        f_min = node.in_port(1).get_source()
        node.in_port(1).disconnect()
        f_max = node.in_port(2).get_source()
        node.in_port(2).disconnect()

        f_diff = Sub(graph, {'name': name + '/float_range'}).create_node()
        f_max.connect(f_diff.in_port(0))
        f_min.connect(f_diff.in_port(1))

        quant_min_value = int(node.narrow_range)
        quant_max_value = 2 ** node.num_bits - 1
        i_diff = Const(graph, dict(name=name + '/int_range', value=quant_max_value - quant_min_value)).create_node()

        scale = Div(graph, {'name': name + '/scale'}).create_node()
        f_diff.out_port(0).connect(scale.in_port(0))
        i_diff.out_port(0).connect(scale.in_port(1))

        # Zero Point Nudging : ZP from min counting
        descaled_min = Div(graph, {'name': name + '/descaled_min'}).create_node()
        f_min.connect(descaled_min.in_port(0))
        scale.out_port(0).connect(descaled_min.in_port(1))

        zero_point_from_min = Sub(graph, {'name': name + '/zero_point_from_min'}).create_node()
        quant_min = Const(graph, {'value': quant_min_value, 'name': name + '/quant_min'}).create_node()
        quant_min.out_port(0).connect(zero_point_from_min.in_port(0))
        descaled_min.out_port(0).connect(zero_point_from_min.in_port(1))

        # Zero Point Nudging : Nudged Zero Point counting
        zp_lesser_q_mi = Less(graph, {'name': name + '/zero_point_from_min_less_quant_min'}).create_node()
        zero_point_from_min.out_port(0).connect(zp_lesser_q_mi.in_port(0))
        quant_min.out_port(0).connect(zp_lesser_q_mi.in_port(1))

        zp_greater_q_ma = Greater(graph, {'name': name + '/zero_point_from_min_greater_quant_max'}).create_node()
        zero_point_from_min.out_port(0).connect(zp_greater_q_ma.in_port(0))
        quant_max = Const(graph, {'value': quant_max_value, 'name': name + '/quant_max'}).create_node()
        quant_max.out_port(0).connect(zp_greater_q_ma.in_port(1))

        rounded_zero_point_from_min = Round(graph, {'name': name + '/zero_point_from_min_rounding'}).create_node()
        zero_point_from_min.out_port(0).connect(rounded_zero_point_from_min.in_port(0))

        nudged_zero_point = Select(graph, {'name': name + '/nudging_zp_1_select_less_condition'}).create_node()
        greater_condition = Select(graph, {'name': name + '/nudging_zp_2_select_greater_condition'}).create_node()

        greater_condition.in_port(0).connect(zp_greater_q_ma.out_port(0))
        greater_condition.in_port(1).connect(quant_max.out_port(0))
        greater_condition.in_port(2).connect(rounded_zero_point_from_min.out_port(0))

        nudged_zero_point.in_port(0).connect(zp_lesser_q_mi.out_port(0))
        nudged_zero_point.in_port(1).connect(quant_max.out_port(0))
        nudged_zero_point.in_port(2).connect(greater_condition.out_port(0))

        nudged_i_min = Sub(graph, {'name': name + '/nudged_i_min'}).create_node()
        quant_min.out_port(0).connect(nudged_i_min.in_port(0))
        nudged_zero_point.out_port(0).connect(nudged_i_min.in_port(1))

        nudged_i_max = Sub(graph, {'name': name + '/nudged_i_max'}).create_node()
        quant_max.out_port(0).connect(nudged_i_max.in_port(0))
        nudged_zero_point.out_port(0).connect(nudged_i_max.in_port(1))

        nudged_min = Mul(graph, {'name': name + '/nudged_min'}).create_node()
        nudged_i_min.out_port(0).connect(nudged_min.in_port(0))
        scale.out_port(0).connect(nudged_min.in_port(1))

        nudged_max = Mul(graph, {'name': name + '/nudged_max'}).create_node()
        nudged_i_max.out_port(0).connect(nudged_max.in_port(0))
        scale.out_port(0).connect(nudged_max.in_port(1))

        nudged_min.out_port(0).connect(node.in_port(1))
        nudged_max.out_port(0).connect(node.in_port(2))

        # FakeQuantize operation has 5 inputs instead of 3 inputs in TensorFlow
        node.add_input_port(3, skip_if_exist=True)
        node.add_input_port(4, skip_if_exist=True)

        node.in_port(3).connect(nudged_min.out_port(0))
        node.in_port(4).connect(nudged_max.out_port(0))

        FakeQuantize.update_node_stat(node, {'levels': node['levels']})