def test_shape_only(self): graph = build_graph( nodes_attributes, [('node_in_1', 'quantize'), ('node_in_2', 'quantize'), ('node_in_3', 'quantize'), ('node_in_4', 'quantize'), ('node_in_5', 'quantize'), ('quantize', 'node_out_1'), ('node_out_1', 'op_output')], { 'node_out_1': { 'shape': None }, 'node_in_1': { 'shape': np.array([1, 3, 10, 20]) }, 'node_in_2': { 'shape': np.array([1, 3, 10, 20]) }, 'node_in_3': { 'shape': np.array([1, 3, 10, 20]) }, 'node_in_4': { 'shape': np.array([1, 3, 10, 20]) }, 'node_in_5': { 'shape': np.array([1, 3, 10, 20]) }, }) quantize_node = Node(graph, 'quantize') FakeQuantize.infer(quantize_node) quantize_shape = np.array([1, 3, 10, 20]) res_shape = graph.node['node_out_1']['shape'] for i in range(0, len(quantize_shape)): self.assertEqual(quantize_shape[i], res_shape[i])
def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]): q = match['quantize'] dq = match['dequantize'] q_scale = q.in_port(1).get_source().node q_zerop = q.in_port(2).get_source().node dq_scale = dq.in_port(1).get_source().node dq_zerop = dq.in_port(2).get_source().node inp_port = q.in_port(0).get_source() name = inp_port.node.soft_get('name', inp_port.node.id) # only constant as for zero_point/scale supported if q_scale.soft_get('type') == 'Const' and dq_scale.soft_get('type') == 'Const' and \ q_zerop.soft_get('type') == 'Const' and dq_zerop.soft_get('type') == 'Const': # only patterns with same scale/zero_point values for Q and DQ are supported if q_scale.value == dq_scale.value and q_zerop.value == dq_zerop.value: log.debug('Found Q-DQ pattern after {}'.format(name)) zero_point_type = q_zerop.value.dtype # data type affects range of output values: [-128..127] or [0..255] if zero_point_type == np.int8: output_min_value = -128.0 output_max_value = 127.0 elif zero_point_type == np.uint8: output_min_value = 0.0 output_max_value = 255.0 else: raise Error('Not supported type {} for zero point value in node {}'.format( zero_point_type, q_zerop.soft_get('name'))) min_value = q_scale.value * (output_min_value - q_zerop.value) max_value = q_scale.value * (output_max_value - q_zerop.value) input_min = Const(graph, {'value': np.array(min_value)}).create_node() input_max = Const(graph, {'value': np.array(max_value)}).create_node() FQ = FakeQuantize(graph, { 'levels': 256, 'name': match['quantize'].name + '_Dequantize/FakeQuantize' }).create_node() FQ.in_port(0).connect(match['quantize'].in_port(0).get_source()) FQ.in_port(1).connect(input_min.out_port(0)) FQ.in_port(2).connect(input_max.out_port(0)) FQ.in_port(3).connect(input_min.out_port(0)) FQ.in_port(4).connect(input_max.out_port(0)) match['dequantize'].out_port(0).get_connection().set_source(FQ.out_port(0)) dq_name = match['dequantize'].soft_get('name', match['dequantize'].id) rename_nodes([(match['dequantize'], dq_name + '/to_be_removed'), (FQ, dq_name)]) else: raise Error('QuantizeLinear and DequantizeLinear (after {}) have different scale or zero-point values, ' 'cannot fuse into FakeQuantize!'.format(name))
def test_shape_and_value(self): graph = build_graph(nodes_attributes, [('node_in_1', 'quantize'), ('node_in_2', 'quantize'), ('node_in_3', 'quantize'), ('node_in_4', 'quantize'), ('node_in_5', 'quantize'), ('quantize', 'node_out_1'), ('node_out_1', 'op_output') ], { 'node_out_1': { 'shape': None, 'value': None, }, 'node_in_1': { 'shape': np.array([4]), 'value': np.array([5, 17, 0, 100], dtype=np.float32), }, 'node_in_2': { 'shape': np.array([4]), 'value': np.array([0, 12, 12, 12], dtype=np.float32), }, 'node_in_3': { 'shape': np.array([4]), 'value': np.array([10, 20, 20, 20], dtype=np.float32), }, 'node_in_4': { 'shape': np.array([4]), 'value': np.array([0, 0, 0, 0], dtype=np.float32), }, 'node_in_5': { 'shape': np.array([4]), 'value': np.array([1, 1, 1, 1], dtype=np.float32), }, }) exp_node = Node(graph, 'quantize') FakeQuantize.infer(exp_node) quantize_shape = np.array([4]) quantize_value = np.array([0, 1, 0, 1], dtype=np.float32) res_shape = graph.node['node_out_1']['shape'] res_value = graph.node['node_out_1']['value'] for i in range(0, len(quantize_shape)): self.assertEqual(quantize_shape[i], res_shape[i]) for i in range(0, len(quantize_value)): self.assertAlmostEqual(quantize_value[i], res_value[i], places=6)
def create_fake_quantize_node(graph: Graph, name): fq = FakeQuantize(graph, { 'name': name, 'levels': 0, 'stop_value_propagation': True }).create_node() input_low = Const(graph, { 'value': np.array(0.0).astype(np.float32) }).create_node() input_height = Const(graph, { 'value': np.array(0.0).astype(np.float32) }).create_node() output_low = Const(graph, { 'value': np.array(0.0).astype(np.float32) }).create_node() output_height = Const(graph, { 'value': np.array(0.0).astype(np.float32) }).create_node() input_low.out_port(0).connect(fq.in_port(1)) input_height.out_port(0).connect(fq.in_port(2)) output_low.out_port(0).connect(fq.in_port(3)) output_height.out_port(0).connect(fq.in_port(4)) input_low.infer(input_low) input_height.infer(input_height) output_low.infer(output_low) output_height.infer(output_height) return fq
def replace_op(self, graph: Graph, node: Node): in_node_0 = node.in_node(0) broadcast = lambda x: np.array([x], dtype=np.float32) threshold = Const(graph, { 'name': node.id + "/Input_1", "value": broadcast(0) }).create_node() in_1 = threshold in_2 = threshold in_3 = Const(graph, { 'name': node.id + "/Input_3", "value": broadcast(-1) }).create_node() in_4 = Const(graph, { 'name': node.id + "/Input_4", "value": broadcast(+1) }).create_node() quant = FakeQuantize(graph, { 'name': node.id + "/FakeQuantize_", "levels": 2 }).create_node(inputs=[in_node_0, in_1, in_2, in_3, in_4]) return [quant.id]
def replace_sub_graph(self, graph: Graph, match: Dict[str, Node]): node = match['op'] name = node.name min_port_tuple = (node.in_port(1).get_source().node, node.in_port(1).get_source().idx) max_port_tuple = (node.in_port(2).get_source().node, node.in_port(2).get_source().idx) node.in_port(1).disconnect() node.in_port(2).disconnect() # make sure min < max min_less_max = Less(graph, { 'name': name + '/if_min_less_max' }).create_node([min_port_tuple, max_port_tuple]) minimum = Select(graph, { 'name': name + '/minimum' }).create_node([min_less_max, min_port_tuple, max_port_tuple]) maximum = Select(graph, { 'name': name + '/maximum' }).create_node([min_less_max, max_port_tuple, min_port_tuple]) # to create zero of limits data type, we multiply it by integer zero zero = create_op_node_with_second_input(graph, Mul, int64_array(0), {'name': name + '/zero'}, input_node=minimum) # if 0 < min < max: min_adj = 0 and max_adj = max - min min_greater_zero = Greater(graph, { 'name': name + '/if_minimum_greater_zero' }).create_node([minimum, zero]) max_minus_min = Sub(graph, { 'name': name + '/max_minus_min' }).create_node([maximum, minimum]) minimum = Select(graph, { 'name': name + '/first_adj_min' }).create_node([min_greater_zero, zero, minimum]) maximum = Select(graph, { 'name': name + '/first_adj_max' }).create_node([min_greater_zero, max_minus_min, maximum]) # if min < max < 0: min_adj = min - max and max_adj = 0 max_less_zero = Less(graph, { 'name': name + '/if_max_less_zero' }).create_node([maximum, zero]) min_minus_max = Sub(graph, { 'name': name + '/min_minus_max' }).create_node([minimum, maximum]) minimum = Select(graph, { 'name': name + '/second_adj_min' }).create_node([max_less_zero, min_minus_max, minimum]) maximum = Select(graph, { 'name': name + '/second_adj_max' }).create_node([max_less_zero, zero, maximum]) # scale = (max - min) / (2 ^ num_bits - 1), float_range = Sub(graph, { 'name': name + '/float_range' }).create_node([maximum, minimum]) quant_min_value, quant_max_value = int( node.narrow_range), 2**node.num_bits - 1 int_range = Const( graph, dict(name=name + '/int_range', value=quant_max_value - quant_min_value)).create_node() scale = Div(graph, { 'name': name + '/scale' }).create_node([float_range, int_range]) # min_adj = scale * round(min / scale) descaled_min = Div(graph, { 'name': name + '/descaled_min' }).create_node([minimum, scale]) rounded_descaled_min = Round(graph, { 'name': name + '/rounded_descaled_min' }).create_node([descaled_min]) min_adj = Mul(graph, { 'name': name + '/min_adj' }).create_node([scale, rounded_descaled_min]) # max_adj = max + min_adj - min. adjustment = Sub(graph, { 'name': name + '/limits_adjustment' }).create_node([min_adj, minimum]) max_adj = Add(graph, { 'name': name + '/max_adj' }).create_node([maximum, adjustment]) # FakeQuantize operation has 5 inputs instead of 3 inputs in TensorFlow node.add_input_port(3, skip_if_exist=True) node.add_input_port(4, skip_if_exist=True) node.in_port(1).connect(min_adj.out_port(0)) node.in_port(2).connect(max_adj.out_port(0)) node.in_port(3).connect(min_adj.out_port(0)) node.in_port(4).connect(max_adj.out_port(0)) FakeQuantize.update_node_stat(node, {'levels': node['levels']})
def extract(cls, node): levels = onnx_attr(node, 'levels', 'i') FakeQuantize.update_node_stat(node, {'levels': levels}) return FakeQuantizeFrontExtractor.enabled
def replace_sub_graph(self, graph: Graph, match: Dict[str, Node]): node = match['op'] name = node.name # Zero Point Nudging : Scale counting f_min = node.in_port(1).get_source() node.in_port(1).disconnect() f_max = node.in_port(2).get_source() node.in_port(2).disconnect() f_diff = Sub(graph, {'name': name + '/float_range'}).create_node() f_max.connect(f_diff.in_port(0)) f_min.connect(f_diff.in_port(1)) quant_min_value = int(node.narrow_range) quant_max_value = 2 ** node.num_bits - 1 i_diff = Const(graph, dict(name=name + '/int_range', value=quant_max_value - quant_min_value)).create_node() scale = Div(graph, {'name': name + '/scale'}).create_node() f_diff.out_port(0).connect(scale.in_port(0)) i_diff.out_port(0).connect(scale.in_port(1)) # Zero Point Nudging : ZP from min counting descaled_min = Div(graph, {'name': name + '/descaled_min'}).create_node() f_min.connect(descaled_min.in_port(0)) scale.out_port(0).connect(descaled_min.in_port(1)) zero_point_from_min = Sub(graph, {'name': name + '/zero_point_from_min'}).create_node() quant_min = Const(graph, {'value': quant_min_value, 'name': name + '/quant_min'}).create_node() quant_min.out_port(0).connect(zero_point_from_min.in_port(0)) descaled_min.out_port(0).connect(zero_point_from_min.in_port(1)) # Zero Point Nudging : Nudged Zero Point counting zp_lesser_q_mi = Less(graph, {'name': name + '/zero_point_from_min_less_quant_min'}).create_node() zero_point_from_min.out_port(0).connect(zp_lesser_q_mi.in_port(0)) quant_min.out_port(0).connect(zp_lesser_q_mi.in_port(1)) zp_greater_q_ma = Greater(graph, {'name': name + '/zero_point_from_min_greater_quant_max'}).create_node() zero_point_from_min.out_port(0).connect(zp_greater_q_ma.in_port(0)) quant_max = Const(graph, {'value': quant_max_value, 'name': name + '/quant_max'}).create_node() quant_max.out_port(0).connect(zp_greater_q_ma.in_port(1)) rounded_zero_point_from_min = Round(graph, {'name': name + '/zero_point_from_min_rounding'}).create_node() zero_point_from_min.out_port(0).connect(rounded_zero_point_from_min.in_port(0)) nudged_zero_point = Select(graph, {'name': name + '/nudging_zp_1_select_less_condition'}).create_node() greater_condition = Select(graph, {'name': name + '/nudging_zp_2_select_greater_condition'}).create_node() greater_condition.in_port(0).connect(zp_greater_q_ma.out_port(0)) greater_condition.in_port(1).connect(quant_max.out_port(0)) greater_condition.in_port(2).connect(rounded_zero_point_from_min.out_port(0)) nudged_zero_point.in_port(0).connect(zp_lesser_q_mi.out_port(0)) nudged_zero_point.in_port(1).connect(quant_max.out_port(0)) nudged_zero_point.in_port(2).connect(greater_condition.out_port(0)) nudged_i_min = Sub(graph, {'name': name + '/nudged_i_min'}).create_node() quant_min.out_port(0).connect(nudged_i_min.in_port(0)) nudged_zero_point.out_port(0).connect(nudged_i_min.in_port(1)) nudged_i_max = Sub(graph, {'name': name + '/nudged_i_max'}).create_node() quant_max.out_port(0).connect(nudged_i_max.in_port(0)) nudged_zero_point.out_port(0).connect(nudged_i_max.in_port(1)) nudged_min = Mul(graph, {'name': name + '/nudged_min'}).create_node() nudged_i_min.out_port(0).connect(nudged_min.in_port(0)) scale.out_port(0).connect(nudged_min.in_port(1)) nudged_max = Mul(graph, {'name': name + '/nudged_max'}).create_node() nudged_i_max.out_port(0).connect(nudged_max.in_port(0)) scale.out_port(0).connect(nudged_max.in_port(1)) nudged_min.out_port(0).connect(node.in_port(1)) nudged_max.out_port(0).connect(node.in_port(2)) # FakeQuantize operation has 5 inputs instead of 3 inputs in TensorFlow node.add_input_port(3, skip_if_exist=True) node.add_input_port(4, skip_if_exist=True) node.in_port(3).connect(nudged_min.out_port(0)) node.in_port(4).connect(nudged_max.out_port(0)) FakeQuantize.update_node_stat(node, {'levels': node['levels']})