Ejemplo n.º 1
0
 def testNestedNumpyArrayWithDType(self):
   t = tensor_util.make_tensor_proto([10.0, 20.0, np.array(30.0)],
                                     dtype=dtypes.float32)
   a = tensor_util.MakeNdarray(t)
   self.assertEqual(np.float32, a.dtype)
   self.assertAllClose(np.array([10.0, 20.0, 30.0], dtype=np.float32), a)
Ejemplo n.º 2
0
    def _convert_layers_batchnorm(self, source_node):
        IR_node = self.IR_graph.node.add()
        TensorflowParser2._copy_and_reop(source_node, IR_node, 'BatchNorm')

        is_transformed = False
        test = self.get_parent(source_node.name, [0])

        if test.type == 'Mul':
            is_transformed = True

        # ssd model is transformed
        if is_transformed:
            # Ax - (Au - b)

            # A
            input_mul_A = self.get_parent(source_node.name, [0, 1])
            tensor_content = input_mul_A.get_attr('value')
            A_content = tensor_util.MakeNdarray(tensor_content)
            self.set_weight(source_node.name, 'A', A_content)

            # b
            input_sub = self.get_parent(source_node.name, [1])
            tensor_content = input_sub.get_attr('value')
            sub_content = tensor_util.MakeNdarray(tensor_content)
            # print(sub_content)
            self.set_weight(source_node.name, 'b', sub_content)

            input_node = self.get_parent(source_node.name, [0])
            IR_node.input.append(input_node.real_name)
            IR_node.attr["_output_shapes"].list.shape.pop()
            IR_node.attr["_output_shapes"].MergeFromString(input_node.layer.attr['_output_shapes'].SerializeToString())

        else:
            # epsilon
            epsilon = self.get_parent(source_node.name, [1])
            IR_node.attr['epsilon'].f = epsilon.layer.attr['value'].tensor.float_val[0]

            # moving variance (var) /read
            moving_variance = self.get_parent(source_node.name, [0])

            if moving_variance.type == 'Identity':
                moving_variance_read = self.src_graph.get_parent(moving_variance.name, [0])
                tensor_content = moving_variance_read.get_attr('value')
                moving_variance_content = tensor_util.MakeNdarray(tensor_content)
                self.set_weight(source_node.name, 'var', moving_variance_content)

            else:
                print(moving_variance.layer)
                assert False

            # gamma (scale)
            Rsqrt = self.get_son(source_node.name, [0], True)
            # print(Rsqrt.out_edges)

            if len(Rsqrt.out_edges) == 2:
                IR_node.attr['scale'].b = False
                output_node = self.get_son(Rsqrt.name, [0, 0], True)
                if output_node.type == 'Sub':
                    output_node = self.get_son(Rsqrt.name, [1, 0], True)
                    Mul = self.get_son(Rsqrt.name, [0], True)
                else:
                    Mul = self.get_son(Rsqrt.name, [1], True)
            else:
                IR_node.attr['scale'].b = True
                son = self.get_son(Rsqrt.name, [0, 0], True)
                gamma_from = self.get_parent(son.name, [1, 1], True)
                gamma = self.check_const(gamma_from)
                gamma_tensor = gamma.get_attr('value')
                scale = tensor_util.MakeNdarray(gamma_tensor)
                self.set_weight(source_node.name, 'scale', scale)
                output_node = self.get_son(source_node.name, [0, 0, 0, 0], True)
                if output_node.type == 'Sub':
                    output_node = self.get_son(source_node.name, [0, 0, 0, 0, 0], True)
                    Mul = self.get_son(Rsqrt.name, [0, 0], True)
                else:
                    Mul = self.get_son(Rsqrt.name, [0, 1], True)

            # beta  (bias)
            beta = self.get_parent(output_node.name, [1, 0, 0], True).get_attr('value')
            bias = tensor_util.MakeNdarray(beta)
            IR_node.attr['bias'].b = True
            self.set_weight(source_node.name, 'bias', bias)

            # moving mean (mean)
            moving_mean = self.get_parent(Mul.name, [0, 0]).get_attr('value')
            mean = tensor_util.MakeNdarray(moving_mean)
            self.set_weight(source_node.name, 'mean', mean)

            # input node
            assert output_node.type == 'Add'
            input_node = self.get_parent(output_node.name, [0, 0])
            IR_node.input.append(input_node.real_name)
            IR_node.attr["_output_shapes"].list.shape.pop()
            IR_node.attr["_output_shapes"].MergeFromString(input_node.layer.attr['_output_shapes'].SerializeToString())
            output_node.real_name = source_node.name
Ejemplo n.º 3
0
import os

import tensorflow as tf
from tensorflow.python.framework import tensor_util

summary_dir = 'tmp/summaries'
summary_writer = tf.summary.create_file_writer('tmp/summaries')

with summary_writer.as_default():
  tf.summary.scalar('loss', 0.1, step=42)
  tf.summary.scalar('loss', 0.2, step=43)
  tf.summary.scalar('loss', 0.3, step=44)
  tf.summary.scalar('loss', 0.4, step=45)


from tensorflow.core.util import event_pb2
from tensorflow.python.lib.io import tf_record

def my_summary_iterator(path):
    for r in tf_record.tf_record_iterator(path):
        yield event_pb2.Event.FromString(r)

for filename in os.listdir(summary_dir):
    path = os.path.join(summary_dir, filename)
    for event in my_summary_iterator(path):
        for value in event.summary.value:
            t = tensor_util.MakeNdarray(value.tensor)
            print(value.tag, event.step, t, type(t))
Ejemplo n.º 4
0
    # [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)

    # Actual detection.
    start_time = time.time()

    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'face_detector'
    request.model_spec.signature_name = 'predict_output'
    request.inputs['image_tensor'].CopyFrom(
        tf.contrib.util.make_tensor_proto(image_np_expanded,
                                          shape=list(image_np_expanded.shape)))

    result = stub.Predict(request, 10.0)  # 5 seconds

    boxes = tensor_util.MakeNdarray(result.outputs['boxes'])
    scores = tensor_util.MakeNdarray(result.outputs['scores'])
    classes = tensor_util.MakeNdarray(result.outputs['classes'])
    num_detections = tensor_util.MakeNdarray(result.outputs['num_detections'])

    # print(boxes.shape)
    box = find_face_bounding_box(boxes[0], scores[0])
    elapsed_time = time.time() - start_time
    print('face_detector time cost: {}'.format(elapsed_time))
    if box is not None:
        ymin, xmin, ymax, xmax = box

        # print('box found: {} {} {} {}'.format(ymin, xmin, ymax, xmax))

        (left, right, top, bottom) = (xmin * frame_width, xmax * frame_width,
                                      ymin * frame_height, ymax * frame_height)
Ejemplo n.º 5
0
    def generate_output_graph(self, input_graph_def, input_node_map,
                              fuse_op_list):
        output_graph_def = graph_pb2.GraphDef()
        skip_list = []
        skip_node_name = []
        uint8_type = dtypes.quint8.as_datatype_enum
        qint32_type = dtypes.qint32.as_datatype_enum
        for index, node in enumerate(input_graph_def.node):

            if index in fuse_op_list:
                input_node = input_node_map[node.input[0]]
                if input_node.op == 'QuantizeV2':
                    new_node = node_def_pb2.NodeDef()

                    new_node.op = node.op + "AndRequantize"
                    for _, value in enumerate(node.input):
                        new_node.input.append(value)
                    weights_node_name = node.input[1]
                    bias_node_name = node.input[2]
                    min_input_node = input_node_map[
                        self.get_node_name_from_input(input_node.input[1])]
                    max_input_node = input_node_map[
                        self.get_node_name_from_input(input_node.input[2])]
                    requantize_node = input_graph_def.node[index + 3]
                    frozen_max_node = input_graph_def.node[index + 2]
                    frozen_min_node = input_graph_def.node[index + 1]
                    new_node.name = requantize_node.name
                    min_filter_node_name = node.input[5]
                    max_filter_node_name = node.input[6]

                    new_node.input.append(frozen_min_node.name)
                    new_node.input.append(frozen_max_node.name)
                    min_filter_node = input_node_map[min_filter_node_name]
                    max_filter_node = input_node_map[max_filter_node_name]

                    new_node.attr["T1"].CopyFrom(node.attr['T1'])
                    new_node.attr["T2"].CopyFrom(node.attr['T2'])
                    min_input_value = (tensor_util.MakeNdarray(
                        min_input_node.attr['value'].tensor))
                    max_input_value = (tensor_util.MakeNdarray(
                        max_input_node.attr['value'].tensor))
                    min_filter_value = (tensor_util.MakeNdarray(
                        min_filter_node.attr['value'].tensor))
                    max_filter_value = (tensor_util.MakeNdarray(
                        max_filter_node.attr['value'].tensor))

                    weights_tensor = tensor_util.MakeNdarray(
                        input_node_map[weights_node_name].attr['value'].tensor)
                    bias_tensor = tensor_util.MakeNdarray(
                        input_node_map[bias_node_name].attr['value'].tensor)
                    bias_scale = 255.0 * 127.0 / (
                        max(abs(max_input_value), abs(min_input_value)) *
                        max(abs(max_filter_value), abs(min_filter_value)))
                    QaAmin = 255 * min_input_value / (max_input_value -
                                                      min_input_value)

                    int32_bias = []

                    for bias_index, value in enumerate(
                            np.sum(np.array(weights_tensor, dtype=np.int32),
                                   axis=0,
                                   dtype=np.int32)):
                        int32_bias.append(
                            int(bias_tensor[bias_index] * bias_scale +
                                value * QaAmin))

                    bias_node = self.check_node_existence(
                        output_graph_def, bias_node_name)
                    if not bias_node:
                        bias_node = input_node_map[bias_node_name]
                    bias_node.attr['dtype'].CopyFrom(
                        attr_value_pb2.AttrValue(type=qint32_type))
                    bias_node.attr['value'].CopyFrom(
                        attr_value_pb2.AttrValue(
                            tensor=tensor_util.make_tensor_proto(
                                int32_bias, dtypes.int32, bias_tensor.shape)))
                    bias_node.attr['value'].tensor.dtype = qint32_type

                    new_node.attr["Tbias"].CopyFrom(
                        attr_value_pb2.AttrValue(type=qint32_type))
                    new_node.attr["Toutput"].CopyFrom(
                        attr_value_pb2.AttrValue(type=uint8_type))

                    skip_list.append(index + 1)
                    skip_list.append(index + 2)
                    skip_list.append(index + 3)
                    output_graph_def.node.extend(
                        [new_node, frozen_max_node, frozen_min_node])
                elif input_node.op == "Requantize":
                    new_node = node_def_pb2.NodeDef()
                    new_node.op = node.op + "AndRequantize"
                    new_node.name = input_graph_def.node[index + 3].name
                    for _, value in enumerate(node.input):
                        new_node.input.append(value)

                    weights_node_name = node.input[1]
                    bias_node_name = node.input[2]
                    min_input_node = input_node_map[
                        self.get_node_name_from_input(input_node.input[3])]
                    max_input_node = input_node_map[
                        self.get_node_name_from_input(input_node.input[4])]
                    requantize_node = input_graph_def.node[index + 3]
                    frozen_max_node = input_graph_def.node[index + 2]
                    frozen_min_node = input_graph_def.node[index + 1]
                    skip_list.append(index + 1)
                    skip_list.append(index + 2)
                    skip_list.append(index + 3)
                    new_node.input.append(frozen_min_node.name)
                    new_node.input.append(frozen_max_node.name)
                    min_filter_node_name = node.input[5]
                    max_filter_node_name = node.input[6]
                    min_filter_node = input_node_map[min_filter_node_name]
                    max_filter_node = input_node_map[max_filter_node_name]

                    new_node.attr["T1"].CopyFrom(node.attr['T1'])
                    new_node.attr["T2"].CopyFrom(node.attr['T2'])
                    min_input_value = (tensor_util.MakeNdarray(
                        min_input_node.attr['value'].tensor))
                    max_input_value = (tensor_util.MakeNdarray(
                        max_input_node.attr['value'].tensor))
                    min_filter_value = (tensor_util.MakeNdarray(
                        min_filter_node.attr['value'].tensor))
                    max_filter_value = (tensor_util.MakeNdarray(
                        max_filter_node.attr['value'].tensor))

                    bias_tensor = tensor_util.MakeNdarray(
                        input_node_map[new_node.input[2]].attr['value'].tensor)
                    bias_scale = 255.0 * 127.0 / (
                        max(abs(max_input_value), abs(min_input_value)) *
                        max(abs(max_filter_value), abs(min_filter_value)))
                    bias_int32 = [int(i * bias_scale) for i in bias_tensor]
                    bias_node = self.check_node_existence(
                        output_graph_def, bias_node_name)
                    if not bias_node:
                        bias_node = input_node_map[bias_node_name]
                    bias_node.attr['dtype'].CopyFrom(
                        attr_value_pb2.AttrValue(type=qint32_type))
                    bias_node.attr['value'].CopyFrom(
                        attr_value_pb2.AttrValue(
                            tensor=tensor_util.make_tensor_proto(
                                bias_int32, dtypes.int32, bias_tensor.shape)))
                    bias_node.attr['value'].tensor.dtype = qint32_type
                    new_node.attr["Tbias"].CopyFrom(
                        attr_value_pb2.AttrValue(type=qint32_type))
                    new_node.attr["Toutput"].CopyFrom(
                        attr_value_pb2.AttrValue(type=uint8_type))

                    output_graph_def.node.extend(
                        [new_node, frozen_max_node, frozen_min_node])
                else:
                    new_node = node_def_pb2.NodeDef()
                    new_node.CopyFrom(node)
                    output_graph_def.node.extend([new_node])

            elif index in skip_list or node.name in skip_node_name:
                continue
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                output_graph_def.node.extend([new_node])
        return output_graph_def
Ejemplo n.º 6
0
    def Apply(self):

        self.internal_result = self.istub.Predict(self.internal_request, 10.0)

        rpredictions = [
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions0']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions1']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions2']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions3']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions4']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions5']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['predictions6'])
        ]
        rlocalisations = [
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations0']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations1']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations2']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations3']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations4']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations5']),
            tensor_util.MakeNdarray(
                self.internal_result.outputs['localisations6'])
        ]
        rbbox_img = tensor_util.MakeNdarray(
            self.internal_result.outputs['bbox_img'])

        self.rclasses, self.rscores, self.rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            SSD.ssd_anchors,
            select_threshold=SSD.thres,
            img_shape=SSD.net_shape,
            num_classes=SSD.total_classes,
            decode=True)

        self.rbboxes = np_methods.bboxes_clip(rbbox_img, self.rbboxes)
        self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_sort(
            self.rclasses, self.rscores, self.rbboxes, top_k=400)
        self.rclasses, self.rscores, self.rbboxes = np_methods.bboxes_nms(
            self.rclasses,
            self.rscores,
            self.rbboxes,
            nms_threshold=SSD.nms_thres)
        self.rbboxes = np_methods.bboxes_resize(rbbox_img, self.rbboxes)
Ejemplo n.º 7
0
 def rename_Transpose(self, source_node):
     IR_node = self._convert_identity_operation(source_node, 1)
     perm = self.get_parent(source_node.name,
                            [1]).layer.attr['value'].tensor
     perm = tensor_util.MakeNdarray(perm).tolist()
     assign_IRnode_values(IR_node, {'perm': perm})
Ejemplo n.º 8
0
    def _update_bias(self):
        """
        Convert the bias from float to int.
        """
        for node_name in self.node_mapping:
            current_node = self.node_mapping[node_name]
            current_node_op = current_node.op
            if current_node_op in self.fused_requantized_bias_op:
                done = False
                another_conv_node = None
                original_conv_node = current_node
                while not done:
                    current_node = self.node_mapping[
                        self.get_node_name_from_input(current_node.input[0])]
                    if current_node.op in self.offset_map:
                        another_conv_node = current_node
                        done = True
                    elif current_node.op == "QuantizedConcatV2":
                        if current_node.name not in self.rerange_concat_node:
                            done = True
                    elif current_node.op not in ("QuantizedMaxPool",
                                                 "QuantizedAvgPool"):
                        done = True

                if not another_conv_node:
                    continue

                bias_node = self.node_mapping[self.get_node_name_from_input(
                    original_conv_node.input[2])]
                bias_node_type = original_conv_node.attr['Tbias']

                if bias_node_type.type != dtypes.float32 or bias_node_type.type == dtypes.qint32:
                    continue

                min_filter_node = self.node_mapping[
                    original_conv_node.input[5]]
                max_filter_node = self.node_mapping[
                    original_conv_node.input[6]]

                channel_size = 1 if not min_filter_node.attr[
                    'value'].tensor.tensor_shape.dim else min_filter_node.attr[
                        'value'].tensor.tensor_shape.dim[0].size

                if channel_size == 1:
                    max_filter_tensor = []
                    min_filter_tensor = []
                    max_filter_tensor.append(
                        (max_filter_node.attr['value'].tensor.float_val)[0])
                    min_filter_tensor.append(
                        (min_filter_node.attr['value'].tensor.float_val)[0])
                else:
                    max_filter_tensor = tensor_util.MakeNdarray(
                        max_filter_node.attr['value'].tensor)
                    min_filter_tensor = tensor_util.MakeNdarray(
                        min_filter_node.attr['value'].tensor)

                offset_value = self.offset_map[another_conv_node.op]
                min_freezed_output_node = self.node_mapping[
                    another_conv_node.input[offset_value]]
                max_freezed_output_node = self.node_mapping[
                    another_conv_node.input[offset_value + 1]]
                min_input = min_freezed_output_node.attr[
                    'value'].tensor.float_val[0]
                max_input = max_freezed_output_node.attr[
                    'value'].tensor.float_val[0]

                bias_tensor = (tensor_util.MakeNdarray(
                    bias_node.attr['value'].tensor))
                bias_length = bias_tensor.shape[0]
                scales = []
                activation_range = 127.0 if current_node.attr['out_type'].type == dtypes.qint8  \
                    else 255.0
                weights_range = 127.0
                for i in range(channel_size):
                    scales.append(activation_range * weights_range /
                                  (max(abs(max_input), abs(min_input)) *
                                   max(abs(max_filter_tensor[i]),
                                       abs(min_filter_tensor[i]))))
                int32_bias = []
                if channel_size > 1:
                    for i in range(bias_length):
                        int32_bias.append(int(bias_tensor[i] * scales[i]))
                else:
                    for i in range(bias_length):
                        int32_bias.append(int(bias_tensor[i] * scales[0]))

                original_conv_node.attr['Tbias'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        type=dtypes.qint32.as_datatype_enum))
                bias_node.attr['dtype'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        type=dtypes.qint32.as_datatype_enum))

                bias_node.attr['value'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        tensor=tensor_util.make_tensor_proto(
                            int32_bias, dtypes.int32, bias_tensor.shape)))
                bias_node.attr[
                    'value'].tensor.dtype = dtypes.qint32.as_datatype_enum
Ejemplo n.º 9
0
    def Apply(self):
        if (not self.has_input):
            self.probs = []
        else:
            self.internal_request = predict_pb2.PredictRequest()
            self.internal_request.model_spec.name = 'actdet_acam'
            self.internal_request.model_spec.signature_name = 'predict_images'

            tube_num = len(self.actor_boxes)
            nptmp1 = np.zeros(tube_num)
            nptmp2 = np.arange(tube_num)

            self.internal_request.inputs['updated_frames'].CopyFrom(
                tf.contrib.util.make_tensor_proto(self.frames,
                                                  dtype=tf.float32,
                                                  shape=self.frames.shape))

            self.internal_request.inputs['temporal_rois'].CopyFrom(
                tf.contrib.util.make_tensor_proto(
                    self.temporal_rois,
                    dtype=tf.float32,
                    shape=self.temporal_rois.shape))

            self.internal_request.inputs[
                'temporal_roi_batch_indices'].CopyFrom(
                    tf.contrib.util.make_tensor_proto(nptmp1,
                                                      dtype=tf.int32,
                                                      shape=nptmp1.shape))

            self.internal_request.inputs['rois'].CopyFrom(
                tf.contrib.util.make_tensor_proto(self.norm_rois,
                                                  dtype=tf.float32,
                                                  shape=self.norm_rois.shape))

            self.internal_request.inputs['roi_batch_indices'].CopyFrom(
                tf.contrib.util.make_tensor_proto(nptmp2,
                                                  dtype=tf.int32,
                                                  shape=nptmp2.shape))

            self.internal_result = self.istub.Predict(self.internal_request,
                                                      10.0)
            self.probs = tensor_util.MakeNdarray(
                self.internal_result.outputs['output'])

        if (not len(self.probs)):
            abstr = "None"
            resstr = "None"
        else:
            abstr = ""
            for ab in self.actor_boxes:
                abstr += "%d|%d|%d|%d|%d-" % (ab['box'][0][0], ab['box'][0][1],
                                              ab['box'][0][2], ab['box'][0][3],
                                              ab['tid'])
            abstr = abstr[:-1]
            resstr = ""
            for i in xrange(len(self.actor_boxes)):
                act_probs = self.probs[i]
                order = np.argsort(act_probs)[::-1]
                for pp in range(PRINT_TOP_K):
                    resstr += "%s|%s|" % (str(act.ACTION_STRINGS[order[pp]]),
                                          str(act_probs[order[pp]]))
                resstr = resstr[:-1]
                resstr += '-'
            resstr = resstr[:-1]

        self.output = "%s@%s" % (abstr, resstr)
Ejemplo n.º 10
0
    def _bf16_convert(self, bf16_node_name):
        self._parse_graph()
        self.converted_ops.append(bf16_node_name)
        bf16_node_detail = self.node_name_mapping[bf16_node_name]
        bf16_node = bf16_node_detail.node
        bf16_node_inputs = list(bf16_node.input)
        for each_input in bf16_node_inputs:
            each_input_detail = self.node_name_mapping[each_input]
            each_input_node = each_input_detail.node
            # Const + Cast => Const optimization
            if each_input_node.op == "Const":
                if each_input_node.attr["dtype"] == attr_value_pb2.AttrValue(
                        type=dtypes.float32.as_datatype_enum):
                    fp32_value = tensor_util.MakeNdarray(
                        each_input_node.attr.get('value').tensor)
                    helper.set_attr_dtype(each_input_node, "dtype",
                                          dtypes.bfloat16)
                    each_input_node.attr['value'].CopyFrom(
                        attr_value_pb2.
                        AttrValue(tensor=tensor_util.make_tensor_proto(
                            fp32_value, dtypes.bfloat16, fp32_value.shape)))
                self.converted_ops.append(each_input)
            # Cast + Cast => O optimization
            elif (each_input_node.op == "Cast"
                  and each_input_node.attr["SrcT"] == attr_value_pb2.AttrValue(
                      type=dtypes.bfloat16.as_datatype_enum)):
                cast_input_name = each_input_node.input[0]
                for index, input_name in enumerate(bf16_node.input):
                    if input_name == each_input_node.name:
                        bf16_node.input[index] = cast_input_name
                if len(each_input_detail.output) == 1:
                    self.input_graph.node.remove(each_input_node)
                    del each_input_node
            elif (each_input not in self.expand_fp32_ops + self.converted_ops
                  and each_input_node.op in BF16Convert.WHITE_LIST +
                  BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST):
                if len(each_input_detail.output) == 1:
                    self._bf16_convert(each_input)
                # TODO: Consider multi-output case
            elif each_input in self.converted_ops:
                pass
            else:
                if each_input + "_FP32toBF16" not in list(
                        self.node_name_mapping.keys()):
                    input_cast_node = helper.create_node(
                        "Cast", each_input + "_FP32toBF16", [each_input])
                    helper.set_attr_dtype(input_cast_node, "DstT",
                                          dtypes.bfloat16)
                    helper.set_attr_dtype(input_cast_node, "SrcT",
                                          dtypes.float32)
                    helper.set_attr_bool(input_cast_node, "Truncate", False)
                else:
                    input_cast_node = self.node_name_mapping[
                        each_input + "_FP32toBF16"].node
                for index, input_name in enumerate(bf16_node.input):
                    if input_name == each_input:
                        bf16_node.input[index] = input_cast_node.name
                self.input_graph.node.extend([input_cast_node])

        # TODO: Need consider different op type
        helper.set_attr_dtype(bf16_node, "T", dtypes.bfloat16)

        bf16_node_outputs = bf16_node_detail.output
        for each_output in bf16_node_outputs:
            each_output_detail = self.node_name_mapping[each_output]
            each_output_node = each_output_detail.node
            # Need consider output node op type

            if (each_output_node.op == "Cast" and each_output_node.attr["DstT"]
                    == attr_value_pb2.AttrValue(
                        type=dtypes.bfloat16.as_datatype_enum)):
                for cast_output in each_output_detail.output:
                    cast_output_node = self.node_name_mapping[cast_output].node
                    for index, input_name in enumerate(cast_output_node.input):
                        if each_output == input_name:
                            cast_output_node.input[index] = bf16_node.name
                del each_output_node
            elif (each_output not in self.expand_fp32_ops + self.converted_ops
                  and each_output_node.op in BF16Convert.WHITE_LIST +
                  BF16Convert.GRAY_LIST + BF16Convert.CLEAR_LIST):
                # TODO: Consider multi node inputs case, check others inputs whether converted to BF16
                self._bf16_convert(each_output)
            elif each_output in self.converted_ops:
                pass
            else:
                if bf16_node_name + "_BF16toFP32" not in list(
                        self.node_name_mapping.keys()):
                    output_cast_node = helper.create_node(
                        "Cast", bf16_node_name + "_BF16toFP32",
                        [bf16_node_name])
                    helper.set_attr_dtype(output_cast_node, "DstT",
                                          dtypes.float32)
                    helper.set_attr_dtype(output_cast_node, "SrcT",
                                          dtypes.bfloat16)
                    helper.set_attr_bool(output_cast_node, "Truncate", False)
                else:
                    output_cast_node = self.node_name_mapping[
                        bf16_node_name + "_BF16toFP32"].node

                for index, input_name in enumerate(each_output_node.input):
                    if bf16_node_name == input_name:
                        each_output_node.input[index] = output_cast_node.name
                self.input_graph.node.extend([output_cast_node])
        return
Ejemplo n.º 11
0
    def getValues(self, op):
        """
        Function to find underlying constants/variables representing operation
        Arguments:
            op: (tf.op) to get values of
        Returns:
            values: (np array) of scalars or variable numbers depending on op
        """
        input_ops = [i.op for i in op.inputs]
        ### Operations not requiring new variables ###
        if op.node_def.op == 'Identity':
            return self.getValues(input_ops[0])
        if op.node_def.op == 'Squeeze':
            print("Squeeze inputs_ops = ", input_ops)
            prevValues = self.getValues(input_ops[0])
            print("Squeeze prevValues = ", prevValues)
            print("Squeeze prevValues.shape = ", prevValues.shape)
            squeeze_dims = op.node_def.attr["squeeze_dims"].list.i
            print("Squeeze squeeze_dims = ", squeeze_dims)
            axis = op.node_def.attr["axis"].list.i
            print("Squeeze axis = ", axis)
            assert (len(axis) == 0 or len(squeeze_dims) == 0)
            prevValues_shape = prevValues.shape
            squeeze = axis if len(axis) > 0 else squeeze_dims
            new_shape = []
            i = 0
            for val in prevValues_shape:
                print("i", i, "val", val)
                if i in squeeze:
                    print("removing", i, "val", val)
                    i += 1
                    continue
                new_shape.append(val)
                i += 1
            print(new_shape)
            # TODO: check about "negative number for axis" (counted backward from the end)
            return prevValues.reshape(new_shape)
        if op.node_def.op == 'ExpandDims':
            # print ("ExpandDims inputs_ops = ",input_ops[1])
            dim = self.getValues(input_ops[1])
            prevValues = self.getValues(input_ops[0])
            print("ExpandDims inputs[1] (dim) = ", dim)
            print("ExpandDims inputs[0] (values) = ", prevValues)
            print("ExpandDims values shape = ", prevValues.shape)
            prevValues_shape = prevValues.shape
            print("ExpandDims op.inputs[0].shape.dims = ",
                  op.inputs[0].shape.dims)
            new_shape = []
            i = 0
            for val in prevValues_shape:
                if i == dim:
                    new_shape.append(1)
                new_shape.append(val)
                i += 1
            print(new_shape)
            # TODO:need also to support - "if you specify a negative number for axis it is counted backward from the end"
            return prevValues.reshape(new_shape)
        if op.node_def.op in ['Reshape']:
            if input_ops[1].node_def.op == 'Pack':
                prevValues = self.getValues(input_ops[0])
                input_dims = op.inputs[0].shape.dims
                input_size = np.prod(
                    np.array([d.value for d in input_dims])[1:])
                shape = (-1, input_size)
            else:
                prevValues = [self.getValues(i) for i in input_ops]
                shape = prevValues[1]
            return np.reshape(prevValues[0], shape)
        if op.node_def.op == 'ConcatV2':
            prevValues = [self.getValues(i) for i in input_ops]
            N = op.node_def.attr["N"].i
            values = prevValues[0:N]
            print("ConcatV2 values = ", prevValues)
            print("concat attr.N =  ", N)

            axis = prevValues[N]
            # print ("axis = ",axis)
            return np.concatenate(values, axis=axis)
        if op.node_def.op == 'Split':
            # print("--------------------------------------Split--------------------------------------")
            cur_op = op.node_def.op
            prevValues = [self.getValues(i) for i in input_ops]
            # print(np.split(prevValues[1], indices_or_sections=2, axis = 1))
            return np.split(prevValues[1], indices_or_sections=2, axis=1)
        if op.node_def.op == 'Const':
            tproto = op.node_def.attr['value'].tensor
            return tensor_util.MakeNdarray(tproto)

        if op.node_def.op in ['StridedSlice']:

            prevValues = [self.getValues(i) for i in input_ops]

            assert (len(prevValues) == 4)  ## or len(prevValues) == 3)

            input_ = prevValues[0]
            print(tf.shape(op.inputs[0]))
            print("inputs (actual name = " + input_ops[0].name + ")")
            print(input_)
            input_shape = input_.shape
            print(input_shape)

            begin = prevValues[1]
            print("begin (actual name = " + input_ops[1].name + ")")
            print(begin)
            assert (len(begin) == 3)  # Todo: support any shape
            end = prevValues[2]
            print("end (actual name = " + input_ops[2].name + ")")
            print(end)
            assert (len(end) == 3)

            strides = prevValues[3]
            print("strides (actual name = " + input_ops[3].name + ")")
            print(strides)
            assert (len(strides) == 3)

            for stride in strides:
                assert (stride == 1)  # only stride = 1 is supported

            # values = input_[begin[0]:end[0],begin[1]:end[1],begin[2]:end[2]]
            # print(values)
            def to_reversed_bit_array(num):
                return (format(num, '03b')[::-1])

            begin_mask = op.node_def.attr["begin_mask"].i
            print("begin_mask =", begin_mask)
            begin_mask_ba = to_reversed_bit_array(begin_mask)
            print("begin_mask =", begin_mask)

            ellipsis_mask = op.node_def.attr["ellipsis_mask"].i
            print("ellipsis_mask =", ellipsis_mask)
            ellipsis_mask_ba = to_reversed_bit_array(ellipsis_mask)
            print("ellipsis_mask_ba =", ellipsis_mask_ba)

            end_mask = op.node_def.attr["end_mask"].i
            print("end_mask =", end_mask)
            end_mask_ba = to_reversed_bit_array(end_mask)
            print("end_mask_ba =", end_mask_ba)

            new_axis_mask = op.node_def.attr["new_axis_mask"].i
            print("new_axis_mask =", new_axis_mask)
            new_axis_mask_ba = to_reversed_bit_array(new_axis_mask)
            print("new_axis_mask_ba =", new_axis_mask_ba)

            shrink_axis_mask = op.node_def.attr["shrink_axis_mask"].i
            print("shrink_axis_mask =", shrink_axis_mask)
            shrink_axis_mask_ba = to_reversed_bit_array(shrink_axis_mask)
            print("shrink_axis_mask_ba =", shrink_axis_mask_ba)

            print()

            actual_begin = begin.copy()
            actual_end = end.copy()
            dims = len(input_shape)
            for i in range(len(begin)):
                # if begin[i]<0:
                #     actual_end[i] =  len(begin) + begin[i]
                # if end[i]<0:
                #     actual_begin[i] =  len(end) + end[i]

                if begin_mask_ba[i] == '1':
                    actual_begin[i] = 0
                if end_mask_ba[i] == '1':
                    actual_end[i] = input_shape[i]
                if shrink_axis_mask_ba[i] == '1':
                    dims -= 1
                    if begin[i] >= 0:
                        actual_begin = begin[i]
                        actual_end = actual_begin[i] + 1
                    else:
                        actual_begin[i] = input_shape[i] + begin[i]
                        actual_end[i] = actual_begin[i] + 1

            print("actual_begin", actual_begin)
            print("actual_end", actual_end)
            values = input_[actual_begin[0]:actual_end[0],
                            actual_begin[1]:actual_end[1],
                            actual_begin[2]:actual_end[2]]
            print(values)
            if dims == 3: return values
            if dims == 2: return values[0]
            if dims == 1: return values[0][0]
            if dims == 0: return values[0][0][0]
        # return self.getValues(input_ops[0])
        ### END operations not requiring new variables ###
        if op.node_def.op in [
                'MatMul', 'BiasAdd', 'Add', 'Sub', 'Relu', 'MaxPool', 'Conv2D',
                'Placeholder', 'Mul'
        ]:
            # need to create variables for these
            return self.opToVarArray(op)

        raise NotImplementedError
Ejemplo n.º 12
0
def parse_tflite_graph(tflite_g, opcodes_map, model, input_prefix=''):
    """
    Returns a Graph object along with some op count stats. All tflite op types are prefixed with "TFL_".
    Names of graph inputs are optionally prefixed with a string to prevent name conflicts in subgraphs.
    Quantizatized tensors are surrounded with quantize/dequantize ops
    """
    op_cnt = collections.Counter()
    attr_cnt = collections.Counter()
    onnx_nodes = []
    output_shapes = {}
    dtypes = {}
    tensor_names = {}
    # Map tensor name to tflite Tensor object so we can fetch quantization info as needed
    name_to_tensor = {}
    # If a node takes a quantized tensor as input, we must add a dequantize op after it.
    # Store a mapping so we only need to make at most one dequantize op per tensor.
    tensor_name_to_dequant_output = {}

    # tflite uses generic names (arg0, arg1, etc.) for inputs but full names for other tensors, so
    # prefixing just the inputs should be fine. Other tensors are prefixed when we do inlining.
    input_indices = {
        tflite_g.Inputs(i)
        for i in range(tflite_g.InputsLength())
    }

    for i in range(tflite_g.TensorsLength()):
        tensor = tflite_g.Tensors(i)
        name = tensor.Name().decode()
        if i in input_indices:
            name = input_prefix + name
        tensor_names[i] = name
        name_to_tensor[name] = tensor

        if tensor.ShapeIsNone():
            output_shapes[name] = None
        elif tensor.ShapeSignatureIsNone():
            # The shape signature uses -1 to signify unknown dims. Old models don't have this and use Shape instead.
            output_shapes[name] = tensor.ShapeAsNumpy().tolist()
        else:
            output_shapes[name] = tensor.ShapeSignatureAsNumpy().tolist()
        buf = model.Buffers(tensor.Buffer())
        dtypes[name] = map_tflite_dtype_to_onnx(tensor.Type())
        if not buf.DataIsNone():
            # For const values we use TF to decode the binary data from the buffer
            t = tensor_pb2.TensorProto()
            t.tensor_content = buf.DataAsNumpy().tobytes()
            if output_shapes[name] is None:
                output_shapes[name] = []
            for d in output_shapes[name]:
                t.tensor_shape.dim.add().size = d
            t.dtype = map_tflite_dtype_to_tf(tensor.Type())
            np_data = tensor_util.MakeNdarray(t)
            onnx_tensor = numpy_helper.from_array(np_data, name=name)
            onnx_node = helper.make_node("Const", [],
                                         outputs=[name],
                                         name=name,
                                         value=onnx_tensor)
            onnx_nodes.append(onnx_node)
            op_cnt["Const"] += 1

    def get_dequant(tensor_name):
        """Creates a dequantize op for the provided tensor if needed and returns the output of the op, or
        the original tensor name if no dequantization is needed"""
        quant = name_to_tensor[tensor_name].Quantization()
        if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone():
            return tensor_name
        if tensor_name in tensor_name_to_dequant_output:
            return tensor_name_to_dequant_output[tensor_name]
        dequant_name = tensor_name + "_dequant"
        attr = {}
        attr['scale'] = quant.ScaleAsNumpy().tolist()
        attr['zero_point'] = quant.ZeroPointAsNumpy().tolist()
        attr['quantized_dimension'] = quant.QuantizedDimension()
        onnx_node = helper.make_node("TFL_DEQUANTIZE", [tensor_name],
                                     [dequant_name],
                                     name=dequant_name,
                                     **attr)
        onnx_nodes.append(onnx_node)
        tensor_name_to_dequant_output[tensor_name] = dequant_name
        output_shapes[dequant_name] = output_shapes[tensor_name].copy()
        dtypes[dequant_name] = onnx_pb.TensorProto.FLOAT
        return dequant_name

    def get_prequant(tensor_name):
        """Called by nodes with the name of the tensor they must output.
        If the output is supposed to be quantized, creates a Quantize op outputting the tensor.
        Returns the name that should be used for the "prequantized" tensor, or the original tensor if no quantization
        is needed"""
        quant = name_to_tensor[tensor_name].Quantization()
        if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone():
            return tensor_name
        prequant_name = tensor_name + "_prequant"
        quantize_name = tensor_name + "_quantize"
        attr = {}
        attr['scale'] = quant.ScaleAsNumpy().tolist()
        attr['zero_point'] = quant.ZeroPointAsNumpy().tolist()
        attr['quantized_dimension'] = quant.QuantizedDimension()
        onnx_node = helper.make_node("TFL_QUANTIZE", [prequant_name],
                                     [tensor_name],
                                     name=quantize_name,
                                     **attr)
        onnx_nodes.append(onnx_node)
        output_shapes[prequant_name] = output_shapes[tensor_name].copy()
        dtypes[prequant_name] = onnx_pb.TensorProto.FLOAT
        return prequant_name

    for i in range(tflite_g.OperatorsLength()):
        op = tflite_g.Operators(i)
        optype = opcodes_map[op.OpcodeIndex()]
        op_cnt[optype] += 1
        attr = {}
        options_type_name = lookup_enum(op.BuiltinOptionsType(),
                                        'BuiltinOptions')
        option_class = get_options_class(options_type_name)
        wants_dequantized_input = True
        has_prequantized_output = True
        if optype == 'QUANTIZE':
            out_tensor = tflite_g.Tensors(op.Outputs(0))
            quant = out_tensor.Quantization()
            has_prequantized_output = False
            if quant is not None and not quant.ScaleIsNone(
            ) and not quant.ZeroPointIsNone():
                attr['scale'] = quant.ScaleAsNumpy().tolist()
                attr['zero_point'] = quant.ZeroPointAsNumpy().tolist()
                attr['quantized_dimension'] = quant.QuantizedDimension()
        elif optype == 'DEQUANTIZE':
            in_tensor = tflite_g.Tensors(op.Inputs(0))
            quant = in_tensor.Quantization()
            wants_dequantized_input = False
            if quant is not None and not quant.ScaleIsNone(
            ) and not quant.ZeroPointIsNone():
                attr['scale'] = quant.ScaleAsNumpy().tolist()
                attr['zero_point'] = quant.ZeroPointAsNumpy().tolist()
                attr['quantized_dimension'] = quant.QuantizedDimension()
        if option_class is not None:
            options = option_class()
            options.Init(op.BuiltinOptions().Bytes, op.BuiltinOptions().Pos)
            # All flatbuffer objects have these properties.
            block_list = [
                options_type_name + 'BufferHasIdentifier', 'Init',
                'GetRootAs' + options_type_name
            ]
            # The rest of the properties of the options class provide its attribute names
            attr_names = {
                opt
                for opt in dir(options)
                if not opt.startswith('_') and opt not in block_list
            }
            for a in list(attr_names):
                # Flatbufffer list properties have 3 functions: *Length, *IsNone, and *AsNumpy
                if a + 'Length' in attr_names:
                    attr_names.remove(a + 'Length')
                    attr_names.remove(a + 'IsNone')
                    attr_names.remove(a)
            for a in attr_names:
                if a.endswith('AsNumpy'):
                    value = getattr(options, a)().tolist()
                    a = a[:-len('AsNumpy')]
                else:
                    # For enums we use a string with the value name, not enum index
                    value = getattr(options, a)()
                    if a in NODE_ATTR_NAME_TO_ENUM_TYPE:
                        value = lookup_enum(value,
                                            NODE_ATTR_NAME_TO_ENUM_TYPE[a])
                    elif a in FUNCTION_ATTRS:
                        value = model.Subgraphs(value).Name().decode()
                attr_cnt[a] += 1
                attr[proper_to_snake_case(a)] = value
        input_names = [
            tensor_names[op.Inputs(i)] for i in range(op.InputsLength())
            if op.Inputs(i) != -1
        ]
        if wants_dequantized_input:
            input_names = [get_dequant(inp) for inp in input_names]
        output_names = [
            tensor_names[op.Outputs(i)] for i in range(op.OutputsLength())
            if op.Outputs(i) != -1
        ]
        if has_prequantized_output:
            output_names = [get_prequant(out) for out in output_names]
        onnx_node = helper.make_node("TFL_" + optype,
                                     input_names,
                                     output_names,
                                     name=output_names[0],
                                     **attr)
        onnx_nodes.append(onnx_node)

    inputs = [
        tensor_names[tflite_g.Inputs(i)]
        for i in range(tflite_g.InputsLength())
    ]
    outputs = [
        tensor_names[tflite_g.Outputs(i)]
        for i in range(tflite_g.OutputsLength())
    ]
    # TODO: Allow input/outputs to be overridden

    for inp in inputs:
        onnx_node = helper.make_node("Placeholder", [],
                                     outputs=[inp],
                                     name=inp)
        onnx_nodes.append(onnx_node)

    graph_name = (tflite_g.Name() or b'tflite graph').decode()
    return onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, inputs, outputs, graph_name
Ejemplo n.º 13
0
    def _intel_cpu_quantize_weight_eightbit(self,
                                            parent,
                                            input_node,
                                            per_channel,
                                            quantization_mode=b"SCALED"):
        base_name = input_node.name + "_"
        qint8_const_name = base_name + "qint8_const"
        min_name = base_name + "min"
        max_name = base_name + "max"
        float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
        epsilon = 1e-4  # Needs to be set empirically if accuracy is not satisfactory
        if parent in ("Conv2D", "MatMul"):
            if per_channel:
                ranges = np.abs(float_tensor).max(axis=(0, 1, 2))
                min_value = -ranges
                max_value = ranges
                # nudging min-max values outside epsilon radius around zero
                ranges[ranges < epsilon] = epsilon
                min_value[np.abs(min_value) < epsilon] = -epsilon
                max_value[np.abs(max_value) < epsilon] = epsilon
                qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8)
            else:
                min_value = np.min(float_tensor.flatten())
                max_value = np.max(float_tensor.flatten())
                # Same processing of min-max as in quantize_weight_eightbit
                # function.
                if min_value > 0.0:
                    min_value = 0.0
                if min_value == max_value:
                    if abs(min_value) < 0.000001:
                        max_value = min_value + 1.0
                    elif min_value > 0:
                        max_value = 2 * min_value
                    else:
                        max_value = min_value / 2.0

                sess = tf.compat.v1.Session()
                with sess.as_default():
                    quantize_op = array_ops.quantize_v2(
                        float_tensor,
                        min_value,
                        max_value,
                        dtypes.qint8,
                        mode=quantization_mode,
                        round_mode="HALF_TO_EVEN")
                    qint8_tensor = quantize_op[0].numpy(
                    ) if tf.executing_eagerly() else quantize_op[0].eval()
                    # Updated min-max values should be passed to the next
                    # feeding node.
                    min_value = quantize_op[1].numpy() if tf.executing_eagerly(
                    ) else quantize_op[1].eval()
                    max_value = quantize_op[2].numpy() if tf.executing_eagerly(
                    ) else quantize_op[2].eval()
                sess.close()
        elif parent == "DepthwiseConv2dNative":
            # get the max values based on dim 0 and 1 for depthwise conv
            # since, the output channel will be dim 2 * dim 3
            ranges = np.abs(float_tensor).max(axis=(0, 1))
            ranges = ranges.flatten()
            min_value = -ranges
            max_value = ranges
            # nudging min-max values outside epsilon radius around zero
            ranges[ranges < epsilon] = epsilon
            min_value[np.abs(min_value) < epsilon] = -epsilon
            max_value[np.abs(max_value) < epsilon] = epsilon
            # Since output channel will be 1 dim which is dim 2 * dim 3
            # When divide by range, qint8_tensor needs to be 3 dim
            # where, 3rd dim should be same dim of ranges
            a, b, c, d = float_tensor.shape
            qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 /
                            ranges).astype(np.int8)
            # get the shape back to 4 dim
            qint8_tensor = qint8_tensor.reshape(a, b, c, d)
        shape = tensor_util.TensorShapeProtoToList(
            input_node.attr["value"].tensor.tensor_shape)
        qint8_const_node = helper.create_constant_node(qint8_const_name,
                                                       qint8_tensor,
                                                       dtypes.qint8,
                                                       shape=shape)

        min_node = helper.create_constant_node(min_name,
                                               min_value,
                                               dtypes.float32,
                                               device=self.device)

        max_node = helper.create_constant_node(max_name,
                                               max_value,
                                               dtypes.float32,
                                               device=self.device)

        self.add_output_graph_node(qint8_const_node)
        self.add_output_graph_node(min_node)
        self.add_output_graph_node(max_node)

        return qint8_const_node.name, min_node.name, max_node.name
Ejemplo n.º 14
0
    def _convert_layers_batchnorm(self, source_node):
        # name, op
        IR_node = self.IR_graph.node.add()
        TensorflowParser2._copy_and_reop(source_node, IR_node, 'BatchNorm')

        # epsilon
        epsilon = self.get_parent(source_node.name, [1])
        IR_node.attr['epsilon'].f = epsilon.layer.attr[
            'value'].tensor.float_val[0]

        # moving variance (var) /read
        moving_variance = self.get_parent(source_node.name, [0])

        if moving_variance.type == 'Identity':
            moving_variance_read = self.src_graph.get_parent(
                moving_variance.name, [0])
            tensor_content = moving_variance_read.get_attr('value')
            moving_variance_content = tensor_util.MakeNdarray(tensor_content)
            self.set_weight(source_node.name, 'var', moving_variance_content)

        else:
            print(moving_variance.layer)
            assert False

        # gamma (scale)
        Rsqrt = self.get_son(source_node.name, [0], True)
        if len(Rsqrt.out_edges) == 2:
            IR_node.attr['scale'].b = False
            output_node = self.get_son(source_node.name, [0, 0, 0], True)
            Mul = self.get_son(source_node.name, [0, 1], True)
        else:
            IR_node.attr['scale'].b = True
            son = self.get_son(source_node.name, [0, 0, 0], True)
            gamma_from = self.get_parent(son.name, [1, 1], True)
            gamma = self.check_const(gamma_from)
            # gamma = self.get_parent(son.name, [1, 1, 0, 0, 0, 1], True)
            gamma_tensor = gamma.get_attr('value')
            scale = tensor_util.MakeNdarray(gamma_tensor)
            self.set_weight(source_node.name, 'scale', scale)
            output_node = self.get_son(source_node.name, [0, 0, 0, 0], True)
            # print(output_node.layer)
            Mul = self.get_son(source_node.name, [0, 0, 1], True)
            # print(Mul.layer)

        # beta  (bias)
        beta = self.get_parent(output_node.name, [1, 0, 0],
                               True).get_attr('value')
        bias = tensor_util.MakeNdarray(beta)  #(96,)
        IR_node.attr['bias'].b = True
        self.set_weight(source_node.name, 'bias', bias)

        # moving mean (mean)
        moving_mean = self.get_parent(Mul.name, [0, 0]).get_attr('value')
        mean = tensor_util.MakeNdarray(moving_mean)
        self.set_weight(source_node.name, 'mean', mean)

        # input node
        assert output_node.type == 'Add'
        input_node = self.get_parent(output_node.name, [0, 0])
        IR_node.input.append(input_node.real_name)

        # output node
        output_node.real_name = source_node.name
Ejemplo n.º 15
0
def get_tf_tensor_data(tensor):
    """Get data from tensor."""
    make_sure(isinstance(tensor, tensor_pb2.TensorProto), "Require TensorProto")
    np_data = tensor_util.MakeNdarray(tensor)
    make_sure(isinstance(np_data, np.ndarray), "{} isn't ndarray".format(np_data))
    return np_data
Ejemplo n.º 16
0
def training_visualization(paths):
    string = "logs"
    dict_list = []

    mpl.style.use('seaborn-darkgrid')

    for path in paths:
        training_dict = {}
        for event in summary_iterator(os.path.join(string, path)):
            if "step" in training_dict:
                if event.step not in training_dict["step"]:
                    training_dict["step"].append(event.step)
            else:
                if event.step != 0:
                    training_dict["step"] = [event.step]

            for value in event.summary.value:
                key = value.tag
                item = tensor_util.MakeNdarray(value.tensor)

                if key in training_dict:
                    training_dict[key].append(item.item())
                else:
                    training_dict[key] = [item.item()]
        dict_list.append(training_dict)

    # plotting
    fig, axes = plt.subplots(nrows=4,
                             ncols=2,
                             sharex=True,
                             num=1,
                             figsize=(6.2, 10))

    for dict_t in dict_list:
        axes[0, 0].plot(dict_t["step"], dict_t["training/loss_u"], lw=2)
        axes[0, 0].set_yscale("log")
        axes[0, 0].set_title(r"$L_C$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[0, 0].set_ylabel(r"$MSE$",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        # axes[0, 0].set_yticks([0.2, 0.1])
        axes[0, 0].tick_params(axis="y", labelsize=12)

        axes[0, 1].plot(dict_t["step"], dict_t["training/loss_r"], lw=2)
        axes[0, 1].set_yscale("log")
        axes[0, 1].set_title(r"$L_r$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[0, 1].tick_params(axis="y", labelsize=12)

        axes[1, 0].plot(dict_t["step"], dict_t["training/loss_b"], lw=2)
        axes[1, 0].set_yscale("log")
        axes[1, 0].set_title(r"$L_b$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[1, 0].set_ylabel(r"$MSE$",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        axes[1, 0].tick_params(axis="y", labelsize=12)

        axes[1, 1].plot(dict_t["step"], dict_t["training/loss_reg"], lw=2)
        axes[1, 1].set_yscale("log")
        axes[1, 1].set_title(r"$L_{reg}$",
                             fontweight="bold",
                             fontsize=15,
                             fontname='Calibri')
        axes[1, 1].tick_params(axis="y", labelsize=12)

    max_f = 0.0
    max_vp = 0.0
    max_ve = 0.0
    max_ps = 0.0
    for dict_t in dict_list:
        max_f = max(
            dict_t["vars/f"]) if max(dict_t["vars/f"]) > max_f else max_f
        max_vp = max(
            dict_t["vars/vp"]) if max(dict_t["vars/vp"]) > max_vp else max_vp
        max_ve = max(
            dict_t["vars/ve"]) if max(dict_t["vars/ve"]) > max_ve else max_ve
        max_ps = max(
            dict_t["vars/ps"]) if max(dict_t["vars/ps"]) > max_ps else max_ps
        max_f += 0.1 * max_f
        max_vp += 0.1 * max_vp
        max_ve += 0.1 * max_ve
        max_ps += 0.1 * max_ps

    for dict_t in dict_list:
        axes[2, 0].plot(dict_t["step"], dict_t["vars/f"], lw=2)
        axes[2, 0].set_ylim(ymin=0.0, ymax=max_f)
        axes[2, 0].set_title(r"$F_p$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[2, 0].set_ylabel(r"$\mu_{value}$",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        axes[2, 0].tick_params(axis="y", labelsize=12)

        axes[2, 1].plot(dict_t["step"], dict_t["vars/vp"], lw=2)
        axes[2, 1].set_ylim(ymin=0.0, ymax=max_vp)
        axes[2, 1].set_title(r"$v_{p}$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[2, 1].tick_params(axis="y", labelsize=12)

        axes[3, 1].plot(dict_t["step"], dict_t["vars/ve"], lw=2)
        axes[3, 1].set_ylim(ymin=0.0, ymax=max_ve)
        axes[3, 1].set_title(r"$v_{e}$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[3, 1].set_xlabel("epoch",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        axes[3, 1].tick_params(axis="y", labelsize=12)

        axes[3, 0].plot(dict_t["step"], dict_t["vars/ps"], lw=2)
        axes[3, 0].set_ylim(ymin=0.0, ymax=max_ps)
        axes[3, 0].set_xlabel("epoch",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        axes[3, 0].set_title(r"$PS$",
                             fontsize=15,
                             fontweight='bold',
                             fontname='Calibri')
        axes[3, 0].set_ylabel(r"$\mu_{value}$",
                              fontsize=15,
                              fontweight='bold',
                              fontname='Calibri')
        axes[3, 0].tick_params(axis="y", labelsize=12)

    fig.subplots_adjust(wspace=0.35, hspace=0.2)
    fig.savefig(
        "training_visualisation.png",
        dpi=300,
        format="png",
        bbox_inches="tight",
    )

    for i in range(2):
        for j in range(2):
            axes[i, j].set_ylim(ymin=0.0)

    return dict_list
Ejemplo n.º 17
0
    def do_transformation(self):
        """Fuse the quantized op with the following requantize op.
            The transformation has two stages, the first step is to fuse the patterns
            defined in self.fuse_patterns and the last step is to fuse the self.sum_patterns.
        Returns:
            [graphdef]: the optimized graphdef object
        """
        int8_type = dtypes.qint8.as_datatype_enum
        uint8_type = dtypes.quint8.as_datatype_enum
        float32_type = dtypes.float32.as_datatype_enum
        qint32_type = dtypes.qint32.as_datatype_enum

        while True:
            target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(
                self.fuse_patterns)
            if len(target_nodes) == 0:
                break

            i = target_nodes[0]

            quantized_node_name = i[0]
            quantized_node = self.graph_info[quantized_node_name].node
            requantize_node_name = i[1]
            requantize_node = self.graph_info[requantize_node_name].node
            requested_output_min_name = requantize_node.input[3]
            requested_output_max_name = requantize_node.input[4]

            quantized_node_op = i[-1][0]

            new_node = node_def_pb2.NodeDef()

            new_node.op = quantized_node_op + "AndRequantize"
            new_node.name = requantize_node_name
            for _, value in enumerate(quantized_node.input):
                new_node.input.append(value)

            new_node.input.append(requested_output_min_name)
            new_node.input.append(requested_output_max_name)
            if 'Tinput' in quantized_node.attr:
                new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput'])
            if 'Tfilter' in quantized_node.attr:
                new_node.attr["Tfilter"].CopyFrom(
                    quantized_node.attr['Tfilter'])
            if 'strides' in quantized_node.attr:
                new_node.attr["strides"].CopyFrom(
                    quantized_node.attr['strides'])
            if 'padding' in quantized_node.attr:
                new_node.attr["padding"].CopyFrom(
                    quantized_node.attr['padding'])

            parent_node_name = Helper.node_name_from_input(
                quantized_node.input[0])
            max_filter_node = self.graph_info[new_node.input[6]].node
            min_filter_node = self.graph_info[new_node.input[5]].node
            last_node = self.graph_info[new_node.input[0]].node
            if last_node.op.find('Requantize') != -1:
                bias_node = self.graph_info[new_node.input[2]].node
                max_input_node = self.graph_info[last_node.input[-1]].node
                min_input_node = self.graph_info[last_node.input[-2]].node
                min_input = (min_input_node.attr['value'].tensor.float_val)[0]
                max_input = (max_input_node.attr['value'].tensor.float_val)[0]
                if 'Depthwise' in quantized_node_op or requantize_node.op.find(
                        'PerChannel') != -1:
                    channel_size = max_filter_node.attr[
                        'value'].tensor.tensor_shape.dim[0].size
                    max_filter_tensor = tensor_util.MakeNdarray(
                        min_filter_node.attr['value'].tensor)
                    min_filter_tensor = tensor_util.MakeNdarray(
                        min_filter_node.attr['value'].tensor)
                else:
                    channel_size = 1
                    max_filter_tensor = []
                    min_filter_tensor = []
                    max_filter_tensor.append(
                        (max_filter_node.attr['value'].tensor.float_val)[0])
                    min_filter_tensor.append(
                        (min_filter_node.attr['value'].tensor.float_val)[0])
                bias_tensor = tensor_util.MakeNdarray(self.graph_info[
                    new_node.input[2]].node.attr['value'].tensor)
                bias_length = bias_tensor.shape[0]
                scales = []
                for i in range(channel_size):
                    scales.append(255.0 * 127.0 /
                                  (max(abs(max_input), abs(min_input)) *
                                   max(abs(max_filter_tensor[i]),
                                       abs(min_filter_tensor[i]))))

                int32_bias = []
                if channel_size > 1:
                    for i in range(bias_length):
                        int32_bias.append((int)(bias_tensor[i] * scales[i]))
                else:
                    for i in range(bias_length):
                        int32_bias.append((int)(bias_tensor[i] * scales[0]))

                bias_node.attr['dtype'].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type if self.
                                             device == 'gpu' else qint32_type))
                bias_node.attr['value'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        tensor=tensor_util.make_tensor_proto(
                            bias_tensor if self.device == 'gpu' else
                            int32_bias, dtypes.float32 if self.device ==
                            'gpu' else dtypes.int32, bias_tensor.shape)))

                bias_node.attr['value'].tensor.dtype = float32_type \
                                        if self.device == 'gpu' else qint32_type
                new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \
                                                if self.device == 'gpu' else qint32_type))
            else:
                new_node.attr["Tbias"].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type))

            if "padding_list" in quantized_node.attr:
                new_node.attr["padding_list"].CopyFrom(
                    quantized_node.attr['padding_list'])
            if "dilations" in quantized_node.attr:
                new_node.attr["dilations"].CopyFrom(
                    quantized_node.attr['dilations'])

            if quantized_node.op == "QuantizedConv2D" or \
                    quantized_node.op == "QuantizedConv2DWithBias":
                new_node.attr["out_type"].CopyFrom(
                    attr_value_pb2.AttrValue(type=int8_type))
            else:
                new_node.attr["out_type"].CopyFrom(
                    attr_value_pb2.AttrValue(type=uint8_type))
            self.graph_analyzer.replace_single_node(
                new_node, [parent_node_name], quantized_node_name,
                [self.graph_info[requantize_node_name].outputs[0]],
                requantize_node_name)
            self.graph_analyzer.remove_node(quantized_node_name)

        target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(
            self.sum_pattern)
        while target_nodes:
            i = target_nodes[0]
            quantized_node_name = i[0]
            quantized_node = self.graph_info[quantized_node_name].node
            requantize_node_name = i[1]
            requantize_node = self.graph_info[requantize_node_name].node
            requested_output_min_name = requantize_node.input[3]
            requested_output_max_name = requantize_node.input[4]

            quantized_node_op = i[-1][0]

            new_node = node_def_pb2.NodeDef()

            new_node.op = quantized_node_op + "AndRequantize"
            new_node.name = requantize_node_name

            for _, value in enumerate(quantized_node.input[:-1]):
                new_node.input.append(value)

            new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput'])
            new_node.attr["Tfilter"].CopyFrom(quantized_node.attr['Tfilter'])
            new_node.attr["strides"].CopyFrom(quantized_node.attr['strides'])
            new_node.attr["padding"].CopyFrom(quantized_node.attr['padding'])

            new_node.input.append(requested_output_min_name)
            new_node.input.append(requested_output_max_name)
            deq_node = self.graph_info[Helper.node_name_from_input(
                quantized_node.input[-1])].node
            if deq_node.op != 'Dequantize' or deq_node.op.find(
                    "Quantize") != -1:
                self.logger.debug(
                    'Dropping fusion due to unsupported pattern..... {}'.
                    format(i))
                target_nodes.remove(i)
                continue
            if deq_node.op == 'Dequantize':
                original_summand_node = self.graph_info[
                    Helper.node_name_from_input(deq_node.input[0])].node
            else:
                original_summand_node = deq_node
            summand_op_type = uint8_type if dtypes.as_dtype(
                deq_node.attr["T"].type) == uint8_type else int8_type

            for j in range(3):
                new_node.input.append(original_summand_node.name +
                                      ':{}'.format(j))

            if "padding_list" in quantized_node.attr:
                new_node.attr["padding_list"].CopyFrom(
                    quantized_node.attr['padding_list'])

            if "dilations" in quantized_node.attr:
                new_node.attr["dilations"].CopyFrom(
                    quantized_node.attr['dilations'])
            new_node.attr["out_type"].CopyFrom(
                attr_value_pb2.AttrValue(type=uint8_type))

            new_node.attr["Tbias"].CopyFrom(
                attr_value_pb2.AttrValue(type=float32_type))

            if summand_op_type == int8_type:
                new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"
            new_node.attr["Tsummand"].CopyFrom(
                attr_value_pb2.AttrValue(type=summand_op_type))

            self.graph_analyzer.replace_single_node(
                new_node,
                [quantized_node.input[0], original_summand_node.name],
                quantized_node.name,
                self.graph_info[requantize_node_name].outputs,
                requantize_node_name)
            self.graph_analyzer.remove_node(quantized_node_name)

            if deq_node.op == 'Dequantize':
                self.graph_analyzer.remove_node_with_single_input_output(
                    deq_node.name)
            target_nodes.remove(i)

        return self.graph_analyzer.dump_graph()
Ejemplo n.º 18
0
def to_numpy(summary_value):
  return tensor_util.MakeNdarray(summary_value.tensor)
Ejemplo n.º 19
0
 def _get_value(input_node):
     input_tensor = input_node.attr["value"].tensor
     tensor_value = tensor_util.MakeNdarray(input_tensor)
     return tensor_value
Ejemplo n.º 20
0
    def generate_output_graph(self, input_graph_def, input_node_map,
                              fuse_op_name):
        output_graph_def = graph_pb2.GraphDef()
        skip_list = []
        skip_node_name = []

        for index, node in enumerate(input_graph_def.node):
            if node.name in fuse_op_name:
                conv_node = input_node_map[node.name]
                bn_node = input_node_map[fuse_op_name[node.name]]
                scales, offsets = self.get_scale_and_offset_values(
                    input_node_map, bn_node)
                weights_node_name = conv_node.input[1]
                weights_node = input_node_map[weights_node_name]

                for bn_input in bn_node.input:
                    skip_node_name.append(bn_input)
                skip_node_name.append(bn_node.name)
                new_node = node_def_pb2.NodeDef()
                new_node.op = conv_node.op
                new_node.name = conv_node.name

                for _, value in enumerate(node.input):
                    new_node.input.append(value)
                weights_node_tensor_shape = weights_node.attr[
                    'value'].tensor.tensor_shape
                if conv_node.op == 'Conv2D':
                    weights_cols = weights_node_tensor_shape.dim[3].size
                elif conv_node.op == "DepthwiseConv2dNative":
                    weights_cols = weights_node_tensor_shape.dim[
                        2].size * weights_node_tensor_shape.dim[3].size
                else:
                    weights_cols = weights_node_tensor_shape.dim[1].size

                weights_tensor = tensor_util.MakeNdarray(
                    weights_node.attr['value'].tensor)

                new_weights = []
                for index, i in enumerate(weights_tensor.flat):
                    new_weights_value = weights_tensor.flat[index] * scales[
                        index % weights_cols]
                    new_weights.append(new_weights_value)

                new_bn = []
                for index in range(weights_cols):
                    new_bn_value = offsets[index]
                    new_bn.append(new_bn_value)

                weights_node.attr['value'].CopyFrom(
                    attr_value_pb2.
                    AttrValue(tensor=tensor_util.make_tensor_proto(
                        new_weights, dtypes.float32, weights_tensor.shape)))

                bias_offset_node = node_def_pb2.NodeDef()
                bias_offset_node.op = "Const"
                bias_offset_node.name = conv_node.name + "_bn_offset"
                bias_offset_node.attr["dtype"].CopyFrom(
                    attr_value_pb2.AttrValue(
                        type=dtypes.float32.as_datatype_enum))
                bias_offset_node.attr["value"].CopyFrom(
                    attr_value_pb2.AttrValue(
                        tensor=tensor_util.make_tensor_proto(
                            new_bn, dtypes.float32, [weights_cols])))

                biasadd_node = node_def_pb2.NodeDef()
                biasadd_node.op = "BiasAdd"
                biasadd_node.name = bn_node.name

                if "data_format" in conv_node.attr:
                    biasadd_node.attr["data_format"].CopyFrom(
                        conv_node.attr['data_format'])
                biasadd_node.attr["T"].CopyFrom(conv_node.attr['T'])
                biasadd_node.input.append(conv_node.name)
                biasadd_node.input.append(bias_offset_node.name)

                for key in conv_node.attr:
                    new_node.attr[key].CopyFrom(conv_node.attr[key])

                output_graph_def.node.extend(
                    [weights_node, bias_offset_node, biasadd_node, new_node])
            elif index in skip_list or node.name in skip_node_name:
                continue
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                output_graph_def.node.extend([new_node])

        return output_graph_def
Ejemplo n.º 21
0
def get_interactive_infer_results(model, model_in):
    fetches = [
        model.get_data_layer().input_tensors,
        model.get_output_tensors(),
    ]

    feed_dict = model.get_data_layer().create_feed_dict(model_in)

    # inputs, outputs = sess.run(fetches, feed_dict=feed_dict)

    # export_path = "/tmp/speech2text/0"
    # print('Exporting trained model to', export_path)

    # builder = tf.saved_model.builder.SavedModelBuilder(export_path)
    # # Define input tensors
    # audio = tf.saved_model.utils.build_tensor_info(
    #     model.get_data_layer().input_tensors["source_tensors"][0])
    # audio_length = tf.saved_model.utils.build_tensor_info(
    #     model.get_data_layer().input_tensors["source_tensors"][1])
    # x_id = tf.saved_model.utils.build_tensor_info(
    #     model.get_data_layer().input_tensors["source_ids"][0])

    # # Define output tensors
    # # decoded_sequence = tf.saved_model.utils.build_tensor_info(
    # #     model.get_output_tensors()[0])

    # # prediction_signature = (
    # #     tf.saved_model.signature_def_utils.build_signature_def(
    # #         inputs={'audio': audio, 'audio_length': audio_length, 'x_id': x_id},
    # #         outputs={'decoded_sequence': decoded_sequence},
    # #         method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))

    # indices_decoded_sequence = tf.saved_model.utils.build_tensor_info(
    #     model.get_output_tensors()[0].indices)
    # values_decoded_sequence = tf.saved_model.utils.build_tensor_info(
    #     model.get_output_tensors()[0].values)
    # dense_shape_decoded_sequence = tf.saved_model.utils.build_tensor_info(
    #     model.get_output_tensors()[0].dense_shape)

    # prediction_signature = (
    #     tf.saved_model.signature_def_utils.build_signature_def(
    #         inputs={'audio': audio, 'audio_length': audio_length, 'x_id': x_id},
    #         outputs={'indices_decoded_sequence': indices_decoded_sequence,
    #                  'values_decoded_sequence': values_decoded_sequence,
    #                  'dense_shape_decoded_sequence': dense_shape_decoded_sequence},
    #         method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))

    # builder.add_meta_graph_and_variables(
    #     sess, [tf.saved_model.tag_constants.SERVING],
    #     signature_def_map={
    #         'predict_output':
    #             prediction_signature,
    #     },
    #     main_op=tf.tables_initializer(),
    #     strip_default_attrs=True)

    # builder.save()

    audio = feed_dict[model.get_data_layer().input_tensors["source_tensors"]
                      [0]]
    audio_length = feed_dict[
        model.get_data_layer().input_tensors["source_tensors"][1]]
    x_id = feed_dict[model.get_data_layer().input_tensors["source_ids"][0]]

    print('audio shape: ', audio.shape)
    print('audio_length shape: ', audio_length.shape)

    # inputs, outputs = sess.run(fetches, feed_dict=feed_dict)

    channel = grpc.insecure_channel('0.0.0.0:8500')
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'speech2text'
    request.model_spec.signature_name = 'predict_output'
    request.inputs['audio'].CopyFrom(
        tf.contrib.util.make_tensor_proto(audio, shape=list(audio.shape)))
    request.inputs['audio_length'].CopyFrom(
        tf.contrib.util.make_tensor_proto(audio_length,
                                          shape=list(audio_length.shape)))
    request.inputs['x_id'].CopyFrom(
        tf.contrib.util.make_tensor_proto(x_id, shape=list(x_id.shape)))

    result_future = stub.Predict.future(request, 5.0)  # 5 seconds
    exception = result_future.exception()
    if exception:
        print(exception)
    else:
        print('Result returned from rpc')

    inputs = model.get_data_layer().input_tensors
    indices_decoded_sequence = tensor_util.MakeNdarray(
        result_future.result().outputs['indices_decoded_sequence'])
    values_decoded_sequence = tensor_util.MakeNdarray(
        result_future.result().outputs['values_decoded_sequence'])
    dense_shape_decoded_sequence = tensor_util.MakeNdarray(
        result_future.result().outputs['dense_shape_decoded_sequence'])

    outputs = tf.SparseTensorValue(indices=indices_decoded_sequence,
                                   values=values_decoded_sequence,
                                   dense_shape=dense_shape_decoded_sequence)

    outputs = [outputs]

    return model.infer(inputs, outputs)
    def testQuantizedTypes(self):
        # Test with array.
        data = [(21, ), (22, ), (23, )]

        t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint32)
        if sys.byteorder == "big":
            self.assertProtoEquals(
                """  
        dtype: DT_QINT32  
        tensor_shape { dim { size: 3 } }  
        tensor_content: "\000\000\000\025\000\000\000\026\000\000\000\027"  
        """, t)
        else:
            self.assertProtoEquals(
                """
        dtype: DT_QINT32
        tensor_shape { dim { size: 3 } }
        tensor_content: "\025\000\000\000\026\000\000\000\027\000\000\000"
        """, t)
        a = tensor_util.MakeNdarray(t)
        self.assertEquals(dtypes.qint32.as_numpy_dtype, a.dtype)
        self.assertAllEqual(np.array(data, dtype=a.dtype), a)

        t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint8)
        self.assertProtoEquals(
            """
      dtype: DT_QUINT8
      tensor_shape { dim { size: 3 } }
      tensor_content: "\025\026\027"
      """, t)
        a = tensor_util.MakeNdarray(t)
        self.assertEquals(dtypes.quint8.as_numpy_dtype, a.dtype)
        self.assertAllEqual(np.array(data, dtype=a.dtype), a)

        t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint8)
        self.assertProtoEquals(
            """
      dtype: DT_QINT8
      tensor_shape { dim { size: 3 } }
      tensor_content: "\025\026\027"
      """, t)
        a = tensor_util.MakeNdarray(t)
        self.assertEquals(dtypes.qint8.as_numpy_dtype, a.dtype)
        self.assertAllEqual(np.array(data, dtype=a.dtype), a)

        t = tensor_util.make_tensor_proto(data, dtype=dtypes.quint16)
        if sys.byteorder == "big":
            self.assertProtoEquals(
                """  
        dtype: DT_QUINT16  
        tensor_shape { dim { size: 3 } }  
        tensor_content: "\000\025\000\026\000\027"  
        """, t)
        else:
            self.assertProtoEquals(
                """
        dtype: DT_QUINT16
        tensor_shape { dim { size: 3 } }
        tensor_content: "\025\000\026\000\027\000"
        """, t)
        a = tensor_util.MakeNdarray(t)
        self.assertEquals(dtypes.quint16.as_numpy_dtype, a.dtype)
        self.assertAllEqual(np.array(data, dtype=a.dtype), a)

        t = tensor_util.make_tensor_proto(data, dtype=dtypes.qint16)
        if sys.byteorder == "big":
            self.assertProtoEquals(
                """  
        dtype: DT_QINT16  
        tensor_shape { dim { size: 3 } }  
        tensor_content: "\000\025\000\026\000\027"  
        """, t)
        else:
            self.assertProtoEquals(
                """
        dtype: DT_QINT16
        tensor_shape { dim { size: 3 } }
        tensor_content: "\025\000\026\000\027\000"
        """, t)
        a = tensor_util.MakeNdarray(t)
        self.assertEquals(dtypes.qint16.as_numpy_dtype, a.dtype)
        self.assertAllEqual(np.array(data, dtype=a.dtype), a)
Ejemplo n.º 23
0
    def generate_output_graph(self, input_graph_def, input_node_map,
                              fuse_op_name):
        output_graph_def = graph_pb2.GraphDef()
        skip_list = []
        skip_node_name = []
        for index, node in enumerate(input_graph_def.node):

            if node.name in fuse_op_name:
                skip_list.append(index + 1)

                original_node = input_node_map[node.name]
                mul_node = input_node_map[fuse_op_name[node.name]]
                weights_node_name = original_node.input[1]
                weights_node = input_node_map[weights_node_name]
                mul_value_node_name = mul_node.input[1]
                mul_value_node = input_node_map[mul_value_node_name]

                new_node = node_def_pb2.NodeDef()
                new_node.op = original_node.op
                new_node.name = mul_node.name

                for _, value in enumerate(node.input):
                    new_node.input.append(value)

                if original_node.op == "DepthwiseConv2dNative":
                    weights_col = weights_node.attr[
                        'value'].tensor.tensor_shape.dim[
                            2].size * weights_node.attr[
                                'value'].tensor.tensor_shape.dim[3].size
                elif original_node.op == "Conv2D":
                    weights_col = weights_node.attr[
                        'value'].tensor.tensor_shape.dim[3].size
                else:
                    weights_col = weights_node.attr[
                        'value'].tensor.tensor_shape.dim[1].size
                mul_value_node_tensor = mul_value_node.attr['value'].tensor
                weights_node_tensor = weights_node.attr['value'].tensor

                if len(mul_value_node_tensor.tensor_shape.dim
                       ) != 1 or mul_value_node_tensor.tensor_shape.dim[
                           0].size != weights_col:
                    self.logger.info("Invalid Mul OP fusion.")

                mul_value_node_list = [
                    i for i in tensor_util.MakeNdarray(
                        mul_value_node_tensor).flat
                ]
                new_weights = []
                for index, i in enumerate(
                        tensor_util.MakeNdarray(weights_node_tensor).flat):
                    new_weights_value = i * mul_value_node_list[
                        index % len(mul_value_node_list)]
                    new_weights.append(new_weights_value)

                weights_node.attr['value'].CopyFrom(
                    attr_value_pb2.
                    AttrValue(tensor=tensor_util.make_tensor_proto(
                        new_weights, dtypes.float32,
                        tensor_util.MakeNdarray(weights_node_tensor).shape)))
                skip_node_name.append(weights_node.name)
                output_graph_def.node.extend([weights_node])
                for key in original_node.attr:
                    new_node.attr[key].CopyFrom(original_node.attr[key])

                output_graph_def.node.extend([new_node])

            elif index in skip_list or node.name in skip_node_name:
                continue
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                output_graph_def.node.extend([new_node])
        return output_graph_def
 def testStringWithImplicitRepeat(self):
     t = tensor_util.make_tensor_proto("f", shape=[3, 4])
     a = tensor_util.MakeNdarray(t)
     self.assertAllEqual(np.array([[b"f"] * 4] * 3, dtype=np.object), a)
def generate_output_graph(input_graph_def, input_node_map, output_node_map,
                          fuse_op_list, fuse_op_deq_list, device):
    output_graph_def = graph_pb2.GraphDef()
    skip_list = []
    skip_node_name = []
    int8_type = dtypes.qint8.as_datatype_enum
    uint8_type = dtypes.quint8.as_datatype_enum
    float32_type = dtypes.float32.as_datatype_enum
    qint32_type = dtypes.qint32.as_datatype_enum
    for index, node in enumerate(input_graph_def.node):
        if index in fuse_op_list:
            const_node_1 = input_graph_def.node[index + 1]
            const_node_2 = input_graph_def.node[index + 2]
            requantize_node = input_graph_def.node[index + 3]
            new_node = node_def_pb2.NodeDef()

            new_node.op = node.op + "AndRequantize"
            new_node.name = requantize_node.name
            for _, value in enumerate(node.input):
                new_node.input.append(value)

            new_node.input.append(const_node_1.name)
            new_node.input.append(const_node_2.name)

            new_node.attr["Tinput"].CopyFrom(node.attr['Tinput'])
            new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter'])
            new_node.attr["strides"].CopyFrom(node.attr['strides'])
            new_node.attr["padding"].CopyFrom(node.attr['padding'])
            if input_node_map[new_node.input[0]].op.find("Requantize") != -1:
                bias_node = input_node_map[new_node.input[2]]
                last_node = input_node_map[new_node.input[0]]
                max_input_node = (input_node_map[last_node.input[4][:-2]])
                min_input_node = (input_node_map[last_node.input[3][:-2]])
                max_filter = input_node_map[new_node.input[6]]
                min_filter = input_node_map[new_node.input[5]]

                min_input = (min_input_node.attr['value'].tensor.float_val)[0]
                max_input = (max_input_node.attr['value'].tensor.float_val)[0]
                if 'Depthwise' in node.op or "RequantizePerChannel" in [
                        node.op for node in output_node_map[node.name]
                ]:

                    channel_size = max_filter.attr[
                        'value'].tensor.tensor_shape.dim[0].size
                    max_filter_tensor = tensor_util.MakeNdarray(
                        max_filter.attr['value'].tensor)
                    min_filter_tensor = tensor_util.MakeNdarray(
                        min_filter.attr['value'].tensor)
                else:

                    channel_size = 1
                    max_filter_tensor = []
                    min_filter_tensor = []
                    max_filter_tensor.append(
                        (max_filter.attr['value'].tensor.float_val)[0])
                    min_filter_tensor.append(
                        (min_filter.attr['value'].tensor.float_val)[0])

                bias_tensor = tensor_util.MakeNdarray(
                    input_node_map[new_node.input[2]].attr['value'].tensor)
                bias_length = bias_tensor.shape[0]
                scales = []
                for i in range(channel_size):
                    scales.append(255.0 * 127.0 /
                                  (max(abs(max_input), abs(min_input)) *
                                   max(abs(max_filter_tensor[i]),
                                       abs(min_filter_tensor[i]))))

                int32_bias = []
                if channel_size > 1:
                    for i in range(bias_length):
                        int32_bias.append((int)(bias_tensor[i] * scales[i]))
                else:
                    for i in range(bias_length):
                        int32_bias.append((int)(bias_tensor[i] * scales[0]))
                #(TODO) GPU not support qint32 bias tensor
                # float32 type should be removed after GPU support qint32 bias
                bias_node.attr['dtype'].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type \
                                             if device =='gpu' else qint32_type))
                bias_node.attr['value'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        tensor=tensor_util.make_tensor_proto(
                            bias_tensor if device == 'gpu' else int32_bias,
                            dtypes.float32 if device ==
                            'gpu' else dtypes.int32, bias_tensor.shape)))

                bias_node.attr['value'].tensor.dtype = float32_type \
                                        if device == 'gpu' else qint32_type
                skip_node_name.append(bias_node.name)
                output_graph_def.node.extend([bias_node])
                new_node.attr["Tbias"].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type \
                                             if device == 'gpu' else qint32_type))

            else:
                new_node.attr["Tbias"].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type))

            if "padding_list" in node.attr:
                new_node.attr["padding_list"].CopyFrom(
                    node.attr['padding_list'])
            if "dilations" in node.attr:
                new_node.attr["dilations"].CopyFrom(node.attr['dilations'])

            if node.op == "QuantizedConv2D" or node.op == "QuantizedConv2DWithBias":
                new_node.attr["out_type"].CopyFrom(
                    attr_value_pb2.AttrValue(type=int8_type))
            else:
                new_node.attr["out_type"].CopyFrom(
                    attr_value_pb2.AttrValue(type=uint8_type))

            skip_list.append(index + 1)
            skip_list.append(index + 2)
            skip_list.append(index + 3)
            output_graph_def.node.extend(
                [new_node, const_node_1, const_node_2])
        elif index in skip_list or node.name in skip_node_name:
            continue
        elif node.op == "Dequantize":
            new_node = node_def_pb2.NodeDef()
            new_node.CopyFrom(node)
            new_node.attr["mode"].s = b"SCALED"
            p_node = input_node_map[new_node.input[0]]
            pp_node = input_node_map[p_node.name].input[0]
            if input_node_map[pp_node].op.find("Relu") != -1 or p_node.op in (
                    "QuantizedAvgPool", "QuantizedMaxPool",
                    "QuantizedConcatV2"):
                new_node.attr["T"].CopyFrom(
                    attr_value_pb2.AttrValue(type=uint8_type))
            elif input_node_map[pp_node].op.find(
                    "QuantizedMatMulWithBias") != -1 and p_node.op.find(
                        "Requantize") != -1:
                new_node.attr["mode"].s = node.attr["mode"].s
                new_node.attr["T"].CopyFrom(
                    attr_value_pb2.AttrValue(type=node.attr["T"].type))
            else:
                new_node.attr["T"].CopyFrom(
                    attr_value_pb2.AttrValue(type=int8_type))
            output_graph_def.node.extend([new_node])
        elif index in fuse_op_deq_list:
            original_summand_node = input_node_map[
                input_graph_def.node[index].input[-1]]
            sum_const_node_1 = input_graph_def.node[index + 1]
            sum_const_node_2 = input_graph_def.node[index + 2]
            sum_requantize_node = input_graph_def.node[index + 3]

            new_node = node_def_pb2.NodeDef()

            new_node.op = node.op + "AndRequantize"
            new_node.name = sum_requantize_node.name
            for _, value in enumerate(node.input[:-1]):
                new_node.input.append(value)
            new_node.input.append(sum_const_node_1.name)
            new_node.input.append(sum_const_node_2.name)
            new_node.input.append(
                input_node_map[original_summand_node.name].input[0])
            new_node.input.append(
                input_node_map[original_summand_node.name].input[0] + ":1")
            new_node.input.append(
                input_node_map[original_summand_node.name].input[0] + ":2")

            # skip_list.append(index + 1)
            # skip_list.append(index + 2)
            skip_list.append(index + 3)

            new_node.attr["Tinput"].CopyFrom(node.attr['Tinput'])
            new_node.attr["Tfilter"].CopyFrom(node.attr['Tfilter'])
            new_node.attr["strides"].CopyFrom(node.attr['strides'])
            new_node.attr["padding"].CopyFrom(node.attr['padding'])
            if input_node_map[new_node.input[0]].op.find("Requantize") != -1:

                bias_node = input_node_map[new_node.input[2]]
                last_node = input_node_map[new_node.input[0]]
                max_input_node = (input_node_map[last_node.input[4][:-2]])
                min_input_node = (input_node_map[last_node.input[3][:-2]])
                max_filter = input_node_map[new_node.input[6]]
                min_filter = input_node_map[new_node.input[5]]

                min_input = (min_input_node.attr['value'].tensor.float_val)[0]
                max_input = (max_input_node.attr['value'].tensor.float_val)[0]

                if "RequantizePerChannel" in [
                        node.op for node in output_node_map[node.name]
                ]:
                    channel_size = max_filter.attr[
                        'value'].tensor.tensor_shape.dim[0].size
                    max_filter_tensor = tensor_util.MakeNdarray(
                        max_filter.attr['value'].tensor)
                    min_filter_tensor = tensor_util.MakeNdarray(
                        min_filter.attr['value'].tensor)
                else:
                    channel_size = 1
                    max_filter_tensor = []
                    min_filter_tensor = []
                    max_filter_tensor.append(
                        (max_filter.attr['value'].tensor.float_val)[0])
                    min_filter_tensor.append(
                        (min_filter.attr['value'].tensor.float_val)[0])

                bias_tensor = (tensor_util.MakeNdarray(
                    input_node_map[new_node.input[2]].attr['value'].tensor))
                bias_length = bias_tensor.shape[0]
                scales = []
                for i in range(channel_size):
                    scales.append(255.0 * 127.0 /
                                  (max(abs(max_input), abs(min_input)) *
                                   max(abs(max_filter_tensor[i]),
                                       abs(min_filter_tensor[i]))))

                int32_bias = []
                if channel_size > 1:
                    for i in range(bias_length):
                        int32_bias.append(int(bias_tensor[i] * scales[i]))
                else:
                    for i in range(bias_length):
                        int32_bias.append(int(bias_tensor[i] * scales[0]))

                #(TODO) GPU not support qint32 bias tensor
                # float32 type should be removed after GPU support qint32 bias
                bias_node.attr['dtype'].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type \
                                             if device =='gpu' else qint32_type))
                bias_node.attr['value'].CopyFrom(
                    attr_value_pb2.AttrValue(
                        tensor=tensor_util.make_tensor_proto(
                            bias_tensor if device == 'gpu' else int32_bias,
                            dtypes.float32 if device ==
                            'gpu' else dtypes.int32, bias_tensor.shape)))

                bias_node.attr['value'].tensor.dtype = float32_type \
                                        if device == 'gpu' else qint32_type
                new_node.attr["Tbias"].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type \
                                             if device == 'gpu' else qint32_type))

                skip_node_name.append(bias_node.name)
                output_graph_def.node.extend([bias_node])

            else:
                new_node.attr["Tbias"].CopyFrom(
                    attr_value_pb2.AttrValue(type=float32_type))

            if "padding_list" in node.attr:
                new_node.attr["padding_list"].CopyFrom(
                    node.attr['padding_list'])
            if "dilations" in node.attr:
                new_node.attr["dilations"].CopyFrom(node.attr['dilations'])

            new_node.attr["out_type"].CopyFrom(
                attr_value_pb2.AttrValue(type=uint8_type))

            summand_op_type = uint8_type if dtypes.as_dtype(
                original_summand_node.attr["T"].type
            ) == uint8_type else int8_type

            if summand_op_type == int8_type:
                new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"

            new_node.attr["Tsummand"].CopyFrom(
                attr_value_pb2.AttrValue(type=summand_op_type))
            output_graph_def.node.extend([new_node])
        else:
            new_node = node_def_pb2.NodeDef()
            new_node.CopyFrom(node)
            output_graph_def.node.extend([new_node])
    return output_graph_def
    def testSingleTensorFullTensorDebugModeWithCircularBufferBehavior(self):
        @def_function.function
        def write_debug_trace(x):
            # DebugIdentityV2 is a stateful op. It ought to be included by auto
            # control dependency.
            square = math_ops.square(x)
            gen_debug_ops.debug_identity_v2(
                square,
                tfdbg_context_id="deadbeaf",
                op_name="Square",
                output_slot=0,
                tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR,
                debug_urls=["file://%s" % self.dump_root])

            sqrt = math_ops.sqrt(x)
            gen_debug_ops.debug_identity_v2(
                sqrt,
                tfdbg_context_id="beafdead",
                op_name="Sqrt",
                output_slot=0,
                tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR,
                debug_urls=["file://%s" % self.dump_root])
            return square + sqrt

        x = np.array([3.0, 4.0])
        # Only the graph-execution trace of the last iteration should be written
        # to self.dump_root.
        for _ in range(self.circular_buffer_size // 2 + 1):
            self.assertAllClose(write_debug_trace(x),
                                [9.0 + np.sqrt(3.0), 16.0 + 2.0])

        with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
            metadata_iter = reader.metadata_iterator()
            # Check that the .metadata DebugEvents data file has been created, even
            # before FlushExecutionFiles() is called.
            debug_event = next(metadata_iter).debug_event
            self.assertGreater(debug_event.wall_time, 0)
            self.assertTrue(debug_event.debug_metadata.tensorflow_version)
            self.assertTrue(
                debug_event.debug_metadata.file_version.startswith(
                    "debug.Event:"))

            graph_trace_iter = reader.graph_execution_traces_iterator()
            # Before FlushExecutionFiles() is called, the .graph_execution_traces file
            # ought to be empty.
            with self.assertRaises(StopIteration):
                next(graph_trace_iter)

            # Flush the circular buffer.
            self.writer.FlushExecutionFiles()
            graph_trace_iter = reader.graph_execution_traces_iterator()

            # The circular buffer has a size of 4. So only the data from the
            # last two iterations should have been written to self.dump_root.
            for _ in range(2):
                debug_event = next(graph_trace_iter).debug_event
                self.assertGreater(debug_event.wall_time, 0)
                trace = debug_event.graph_execution_trace
                self.assertEqual(trace.tfdbg_context_id, "deadbeaf")
                self.assertEqual(trace.op_name, "Square")
                self.assertEqual(trace.output_slot, 0)
                self.assertEqual(trace.tensor_debug_mode,
                                 debug_event_pb2.TensorDebugMode.FULL_TENSOR)
                tensor_value = tensor_util.MakeNdarray(trace.tensor_proto)
                self.assertAllClose(tensor_value, [9.0, 16.0])

                debug_event = next(graph_trace_iter).debug_event
                self.assertGreater(debug_event.wall_time, 0)
                trace = debug_event.graph_execution_trace
                self.assertEqual(trace.tfdbg_context_id, "beafdead")
                self.assertEqual(trace.op_name, "Sqrt")
                self.assertEqual(trace.output_slot, 0)
                self.assertEqual(trace.tensor_debug_mode,
                                 debug_event_pb2.TensorDebugMode.FULL_TENSOR)
                tensor_value = tensor_util.MakeNdarray(trace.tensor_proto)
                self.assertAllClose(tensor_value, [np.sqrt(3.0), 2.0])

            # Only the graph-execution trace of the last iteration should be written
            # to self.dump_root.
            with self.assertRaises(StopIteration):
                next(graph_trace_iter)
Ejemplo n.º 27
0
        prnet_image_cropper.PreProcess(prnet_request, stub)
        prnet_image_cropper.Apply()
        next_request = prnet_image_cropper.PostProcess()
        elapsed_time = time.time() - start_time
        print('prnet_image_cropper time cost: {}'.format(elapsed_time))

        start_time = time.time()
        prn = PRNet()
        prn.PreProcess(next_request, stub)
        prn.Apply()
        final_request = prn.PostProcess()
        elapsed_time = time.time() - start_time
        print('prnet time cost: {}'.format(elapsed_time))

        start_time = time.time()
        kpt = tensor_util.MakeNdarray(final_request.inputs["prnet_output"])
        vertices = tensor_util.MakeNdarray(final_request.inputs["vertices"])
        print(vertices.shape)

        q.put(vertices)

        #        show_img = plot_vertices(np.zeros_like(image), vertices)

        #        show_img = image
        elapsed_time = time.time() - start_time
        print('plot vertices time cost: {}'.format(elapsed_time))
    else:
        q.put(None)
        # Display the resulting frame
#    cv2.imshow('frame',show_img)
Ejemplo n.º 28
0
def parse_tflite_graph(tflite_g,
                       opcodes_map,
                       model,
                       input_prefix='',
                       tensor_shapes_override=None):
    """
    Returns a Graph object along with some op count stats. All tflite op types are prefixed with "TFL_".
    Names of graph inputs are optionally prefixed with a string to prevent name conflicts in subgraphs.
    Quantizatized tensors are surrounded with quantize/dequantize ops
    """
    op_cnt = collections.Counter()
    attr_cnt = collections.Counter()
    onnx_nodes = []
    output_shapes = {}
    dtypes = {}
    tensor_names = {}
    if tensor_shapes_override is None:
        tensor_shapes_override = {}
    # Map tensor name to tflite Tensor object so we can fetch quantization info as needed
    name_to_tensor = {}
    # If a node takes a quantized tensor as input, we must add a dequantize op after it.
    # Store a mapping so we only need to make at most one dequantize op per tensor.
    tensor_name_to_dequant_output = {}

    # tflite uses generic names (arg0, arg1, etc.) for inputs but full names for other tensors, so
    # prefixing just the inputs should be fine. Other tensors are prefixed when we do inlining.
    input_indices = {
        tflite_g.Inputs(i)
        for i in range(tflite_g.InputsLength())
    }

    for i in range(tflite_g.TensorsLength()):
        tensor = tflite_g.Tensors(i)
        name = tensor.Name().decode()
        if i in input_indices:
            name = input_prefix + name
        tensor_names[i] = name
        name_to_tensor[name] = tensor

        if name in tensor_shapes_override:
            output_shapes[name] = tensor_shapes_override[name]
        elif tensor.ShapeIsNone():
            output_shapes[name] = None
        elif tensor.ShapeSignatureIsNone():
            # The shape signature uses -1 to signify unknown dims. Old models don't have this and use Shape instead.
            output_shapes[name] = tensor.ShapeAsNumpy().tolist()
        else:
            output_shapes[name] = tensor.ShapeSignatureAsNumpy().tolist()
        buf = model.Buffers(tensor.Buffer())
        dtypes[name] = map_tflite_dtype_to_onnx(tensor.Type())
        if not buf.DataIsNone() and tensor.Buffer() > 0:
            # For const values we use TF to decode the binary data from the buffer
            t = tensor_pb2.TensorProto()
            t.tensor_content = buf.DataAsNumpy().tobytes()
            if output_shapes[name] is None:
                output_shapes[name] = []
            for d in output_shapes[name]:
                t.tensor_shape.dim.add().size = d
            t.dtype = map_tflite_dtype_to_tf(tensor.Type())
            if t.dtype == tf.string:
                onnx_tensor = parse_tflite_string_tensor(
                    t.tensor_content, output_shapes[name])
            else:
                np_data = tensor_util.MakeNdarray(t)
                onnx_tensor = numpy_helper.from_array(np_data, name=name)
            onnx_node = helper.make_node("Const", [],
                                         outputs=[name],
                                         name=name,
                                         value=onnx_tensor)
            onnx_nodes.append(onnx_node)
            op_cnt["Const"] += 1

    def get_dequant(tensor_name):
        """Creates a dequantize op for the provided tensor if needed and returns the output of the op, or
        the original tensor name if no dequantization is needed"""
        quant = name_to_tensor[tensor_name].Quantization()
        if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone():
            return tensor_name
        if tensor_name in tensor_name_to_dequant_output:
            return tensor_name_to_dequant_output[tensor_name]
        dequant_name = tensor_name + "_dequant"
        attr = get_quantization_attr(quant)
        onnx_node = helper.make_node("TFL_DEQUANTIZE", [tensor_name],
                                     [dequant_name],
                                     name=dequant_name,
                                     **attr)
        onnx_nodes.append(onnx_node)
        tensor_name_to_dequant_output[tensor_name] = dequant_name
        output_shapes[dequant_name] = output_shapes[tensor_name].copy()
        dtypes[dequant_name] = onnx_pb.TensorProto.FLOAT
        return dequant_name

    def get_prequant(tensor_name):
        """Called by nodes with the name of the tensor they must output.
        If the output is supposed to be quantized, creates a Quantize op outputting the tensor.
        Returns the name that should be used for the "prequantized" tensor, or the original tensor if no quantization
        is needed"""
        quant = name_to_tensor[tensor_name].Quantization()
        if quant is None or quant.ScaleIsNone() or quant.ZeroPointIsNone():
            return tensor_name
        prequant_name = tensor_name + "_prequant"
        quantize_name = tensor_name + "_quantize"
        attr = get_quantization_attr(quant)
        onnx_node = helper.make_node("TFL_QUANTIZE", [prequant_name],
                                     [tensor_name],
                                     name=quantize_name,
                                     **attr)
        onnx_nodes.append(onnx_node)
        output_shapes[prequant_name] = output_shapes[tensor_name].copy()
        dtypes[prequant_name] = onnx_pb.TensorProto.FLOAT
        return prequant_name

    for i in range(tflite_g.OperatorsLength()):
        op = tflite_g.Operators(i)
        optype = 'TFL_' + opcodes_map[op.OpcodeIndex()]
        op_cnt[optype] += 1
        attr = {}
        options_type_name = lookup_enum(op.BuiltinOptionsType(),
                                        'BuiltinOptions')
        option_class = get_options_class(options_type_name)
        wants_dequantized_input = True
        has_prequantized_output = True
        if optype == 'TFL_QUANTIZE':
            out_tensor = tflite_g.Tensors(op.Outputs(0))
            quant = out_tensor.Quantization()
            has_prequantized_output = False
            if quant is not None and not quant.ScaleIsNone(
            ) and not quant.ZeroPointIsNone():
                attr.update(get_quantization_attr(quant))
        elif optype == 'TFL_DEQUANTIZE':
            in_tensor = tflite_g.Tensors(op.Inputs(0))
            quant = in_tensor.Quantization()
            wants_dequantized_input = False
            if quant is not None and not quant.ScaleIsNone(
            ) and not quant.ZeroPointIsNone():
                attr.update(get_quantization_attr(quant))
        input_names = [
            tensor_names[op.Inputs(i)] for i in range(op.InputsLength())
            if op.Inputs(i) != -1
        ]
        output_names = [
            tensor_names[op.Outputs(i)] for i in range(op.OutputsLength())
            if op.Outputs(i) != -1
        ]
        if optype.startswith("TFL_Flex"):
            data = read_flexbuffer(op.CustomOptionsAsNumpy().tobytes(),
                                   decode_strings=False)
            utils.make_sure(
                isinstance(data, list),
                "Flex ops are expected to store data as a flexbuffer list")
            tf_op = data[0].decode("utf-8")
            tf_node_def = node_def_pb2.NodeDef()
            tf_node_def.ParseFromString(data[1])
            input_tf_dtypes = [
                map_tflite_dtype_to_tf(name_to_tensor[inp].Type())
                for inp in input_names
            ]

            def shape_to_tf_shape(dims):
                return [None if d < 0 else d
                        for d in dims] if dims is not None else None

            input_shapes = [
                shape_to_tf_shape(output_shapes[inp]) for inp in input_names
            ]
            tf_attrs, _ = read_tf_node_def_attrs(tf_node_def, input_tf_dtypes,
                                                 input_shapes)
            attr.update(tf_attrs)
            optype = tf_op
        elif not op.CustomOptionsIsNone():
            custom_ops_format = lookup_enum(op.CustomOptionsFormat(),
                                            'CustomOptionsFormat')
            if custom_ops_format == 'FLEXBUFFERS':
                data = None
                try:
                    data = read_flexbuffer(op.CustomOptionsAsNumpy().tobytes())
                except Exception as e:  # pylint: disable=broad-except
                    logger.warning(
                        "Could not parse attributes for custom op '%s': %s",
                        optype, e)
                if isinstance(data, dict):
                    attr.update(data)
        if option_class is not None:
            options = option_class()
            options.Init(op.BuiltinOptions().Bytes, op.BuiltinOptions().Pos)
            # All flatbuffer objects have these properties.
            block_list = [
                options_type_name + 'BufferHasIdentifier', 'Init',
                'GetRootAs' + options_type_name
            ]
            # The rest of the properties of the options class provide its attribute names
            attr_names = {
                opt
                for opt in dir(options)
                if not opt.startswith('_') and opt not in block_list
            }
            for a in list(attr_names):
                # Flatbufffer list properties have 3 functions: *Length, *IsNone, and *AsNumpy
                if a + 'Length' in attr_names:
                    attr_names.remove(a + 'Length')
                    attr_names.remove(a + 'IsNone')
                    attr_names.remove(a)
            for a in attr_names:
                if a.endswith('AsNumpy'):
                    value = getattr(options, a)().tolist()
                    a = a[:-len('AsNumpy')]
                else:
                    # For enums we use a string with the value name, not enum index
                    value = getattr(options, a)()
                    if a in NODE_ATTR_NAME_TO_ENUM_TYPE:
                        value = lookup_enum(value,
                                            NODE_ATTR_NAME_TO_ENUM_TYPE[a])
                    elif a in FUNCTION_ATTRS:
                        value = model.Subgraphs(value).Name().decode()
                attr_cnt[a] += 1
                attr[proper_to_snake_case(a)] = value
        if wants_dequantized_input:
            input_names = [get_dequant(inp) for inp in input_names]
        if optype == "TFL_TFLite_Detection_PostProcess":
            # There's a bug in tflite for the output shapes of this op
            for out, shape in zip(output_names,
                                  [[-1, -1, 4], [-1, -1], [-1, -1], [-1]]):
                if len(output_shapes[out]) != len(shape):
                    output_shapes[out] = shape
        if all(output_shapes[out] == [] for out in output_names):
            # tflite uses [] to represent both scalars and completely unknown shapes
            # If an op has non-scalar inputs and all scalar outputs, it is very likely the shapes are actually unknown.
            inp_shapes = [output_shapes[inp] for inp in input_names]
            if not all(s == [] for s in inp_shapes):
                if any(s is None
                       for s in inp_shapes) or not op_has_scalar_output(
                           inp_shapes, optype, attr):
                    for out in output_names:
                        logger.warning(
                            "Replacing scalar output shape of %s with unknown shape",
                            out)
                        output_shapes[out] = None
        if has_prequantized_output:
            output_names = [get_prequant(out) for out in output_names]
        onnx_node = helper.make_node(optype,
                                     input_names,
                                     output_names,
                                     name=output_names[0],
                                     **attr)
        onnx_nodes.append(onnx_node)

    inputs = [
        tensor_names[tflite_g.Inputs(i)]
        for i in range(tflite_g.InputsLength())
    ]
    outputs = [
        tensor_names[tflite_g.Outputs(i)]
        for i in range(tflite_g.OutputsLength())
    ]
    # TODO: Allow input/outputs to be overridden

    for inp in inputs:
        onnx_node = helper.make_node("Placeholder", [],
                                     outputs=[inp],
                                     name=inp)
        onnx_nodes.append(onnx_node)

    graph_name = (tflite_g.Name() or b'tflite graph').decode()
    return onnx_nodes, op_cnt, attr_cnt, output_shapes, dtypes, inputs, outputs, graph_name
Ejemplo n.º 29
0
def tf_to_hls(yamlConfig):

    ######################
    ##  Do translation
    ######################

    #This is a list of dictionaries to hold all the layer info we need to generate HLS
    layer_list = []

    if not os.path.exists(yamlConfig['TensorFlowModel']):
        raise Exception('The specified file does not exist: {}'.format(
            yamlConfig['TensorFlowModel']))

    graph_def = None
    graph = None

    #Extract model architecture from pb
    try:
        with tf.io.gfile.GFile(yamlConfig['TensorFlowModel'], "rb") as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())
    except BaseException as e:
        raise Exception('Error loading the graph definition: {}'.format(
            str(e)))

    try:
        assert graph_def is not None
        with tf.Graph().as_default() as graph:
            tf.import_graph_def(graph_def,
                                input_map=None,
                                return_elements=None,
                                name='',
                                producer_op_list=None)
    except BaseException as e:
        raise Exception('Error importing the graph: {}'.format(str(e)))

    #Define supported operations
    array_ops = ['ConcatV2', 'StridedSlice', 'Transpose']
    core_ops = ['Const', 'Identity', 'Placeholder']
    image_ops = ['ResizeNearestNeighbor']
    math_ops = ['Add', 'MatMul', 'Mul', 'Sigmoid']
    nn_ops = [
        'AvgPool', 'BiasAdd', 'Conv2D', 'Elu', 'FusedBatchNorm', 'MaxPool',
        'Relu', 'Selu', 'Softmax'
    ]
    supported_ops = array_ops + core_ops + image_ops + math_ops + nn_ops

    input_layers = []
    output_layers = _find_graph_outputs(graph)

    # Get input shape and check for unsupported layer type
    output_shape = None
    for tf_op in graph.get_operations():
        if tf_op.type not in supported_ops:
            raise Exception('ERROR: Unsupported layer type: {}'.format(
                tf_op.type))

    print('Topology:')
    for tf_op in graph.get_operations():
        handled = False

        layer = {}
        layer['name'] = tf_op.name

        if tf_op.type == 'Placeholder':
            if len(tf_op.inputs) == 0:  # Input
                output_shape = tf_op.outputs[0].shape.as_list()
                layer['class_name'] = 'InputLayer'
                layer['input_shape'] = output_shape[1:]
                #layer['outputs'] = [tf_op.outputs[0].name for o in tf_op.outputs]
                layer['outputs'] = _parse_tensor_names(tf_op.outputs)
                input_layers.append(layer['name'])
                handled = True

        elif tf_op.type == 'Const' or tf_op.type == 'Identity':
            # Nothing to do here, TFDataReader handles these
            handled = True
            continue

        elif tf_op.type == 'MatMul':
            input_shape = tf_op.inputs[0].shape.as_list()
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['class_name'] = 'Dense'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            layer['n_in'] = input_shape[-1]
            layer['n_out'] = output_shape[-1]
            handled = True

        elif tf_op.type == 'BiasAdd':
            input_shape = tf_op.inputs[0].shape.as_list()
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['class_name'] = 'BiasAdd'
            layer['op'] = 'Add'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            handled = True

        elif tf_op.type in ['Elu', 'Relu', 'Selu', 'Sigmoid', 'Softmax']:
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['class_name'] = 'Activation'
            layer['activation'] = tf_op.type
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            handled = True

        elif tf_op.type == 'Conv2D':
            input_shape = tf_op.inputs[0].shape.as_list()
            weights_shape = tf_op.inputs[1].shape.as_list()
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format(
                tf_op.get_attr('data_format').decode())
            dilations = tf_op.get_attr('dilations')
            strides = tf_op.get_attr('strides')

            layer['class_name'] = 'Conv2D'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])

            layer['n_chan'] = input_shape[c_idx]
            layer['in_height'] = input_shape[h_idx]
            layer['in_width'] = input_shape[w_idx]

            # weights_shape = (filter_height, filter_width, n_channels, n_filters)
            layer['filt_height'] = weights_shape[0]
            layer['filt_width'] = weights_shape[1]
            layer['n_chan'] = weights_shape[2]
            layer['n_filt'] = weights_shape[3]

            layer['stride_height'] = strides[h_idx]
            layer['stride_width'] = strides[w_idx]
            layer['dilation_height'] = dilations[h_idx]
            layer['dilation_width'] = dilations[w_idx]

            layer['padding'] = tf_op.get_attr('padding').decode().lower()
            in_height = input_shape[h_idx]
            in_width = input_shape[w_idx]
            _compute_pads_2d(layer, in_height, in_width)

            handled = True

        elif tf_op.type == 'MaxPool':
            input_shape = tf_op.inputs[0].shape.as_list()
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format(
                tf_op.get_attr('data_format').decode())
            strides = tf_op.get_attr('strides')
            kernel_size = tf_op.get_attr('ksize')

            layer['class_name'] = 'MaxPooling2D'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])

            layer['padding'] = tf_op.get_attr('padding').decode().lower()

            layer['in_height'] = input_shape[h_idx]
            layer['in_width'] = input_shape[w_idx]
            layer['n_filt'] = input_shape[c_idx]

            layer['stride_height'] = strides[h_idx]
            layer['stride_width'] = strides[w_idx]
            layer['filt_height'] = layer['pool_height'] = kernel_size[h_idx]
            layer['filt_width'] = layer['pool_width'] = kernel_size[w_idx]

            layer['padding'] = tf_op.get_attr('padding').decode().lower()
            in_height = input_shape[h_idx]
            in_width = input_shape[w_idx]
            _compute_pads_2d(layer, in_height, in_width)

            handled = True

        elif tf_op.type == 'FusedBatchNorm':
            input_shape = tf_op.inputs[0].shape.as_list()
            output_shape = tf_op.outputs[0].shape.as_list()

            layer['class_name'] = 'BatchNormalization'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            layer['data_format'], c_idx, h_idx, w_idx = _parse_data_format(
                tf_op.get_attr('data_format').decode())
            layer['n_in'] = np.prod(input_shape[1:])
            layer['epsilon'] = tf_op.get_attr('epsilon')

            if len(input_shape) < 4:
                layer['n_filt'] = -1
            else:
                layer['n_filt'] = input_shape[c_idx]

            handled = True

        elif tf_op.type == 'ConcatV2':
            layer['class_name'] = 'Concatenate'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[:-1])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            output_shape = tf_op.outputs[0].shape.as_list()

            rank = tf_op.get_attr('N')
            if rank != 2:
                raise Exception(
                    'Unsupported number of inputs in Concat operation')

            layer['op'] = layer['class_name'].lower() + '{}d'.format(rank)
            layer['axis'] = tf_op.inputs[2].op.node_def.attr[
                'value'].tensor.int_val[0]  # Urgh!

            handled = True

        elif tf_op.type in ['Add', 'Mul']:
            layer['class_name'] = 'Merge'
            layer['inputs'] = _parse_tensor_names(list(tf_op.inputs))
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            output_shape = tf_op.outputs[0].shape.as_list()

            layer['op'] = tf_op.type.lower()
            if layer['op'] == 'mul':
                layer['op'] = 'multiply'

            handled = True

        elif tf_op.type == 'Transpose':
            layer['class_name'] = 'Transpose'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])
            layer['perm'] = tensor_util.MakeNdarray(
                tf_op.inputs[1].op.node_def.attr['value'].tensor).tolist()
            output_shape = tf_op.outputs[0].shape.as_list()

            handled = True

        elif tf_op.type == 'ResizeNearestNeighbor':
            layer['class_name'] = 'Resize'
            layer['algorithm'] = 'nearest'
            layer['inputs'] = _parse_tensor_names(tf_op.inputs[0])
            layer['outputs'] = _parse_tensor_names(tf_op.outputs[0])

            input_shape = tf_op.inputs[0].shape.as_list()  # (B, H, W, C)
            output_shape = tf_op.outputs[0].shape.as_list()
            layer['height'] = input_shape[1]
            layer['width'] = input_shape[2]
            layer['n_chan'] = input_shape[3]
            layer['new_height'] = output_shape[1]
            layer['new_width'] = output_shape[2]

            # Check for currently unsupported operations
            align_corners = tf_op.get_attr('align_corners')
            if align_corners:
                raise NotImplementedError(
                    'Property "align_corners=True" is not supported.')
            half_pixel_centers = tf_op.get_attr('align_corners')
            if half_pixel_centers:
                raise NotImplementedError(
                    'Property "half_pixel_centers=True" is not supported.')

            handled = True

        if not handled:
            raise Exception('Unable to parse operation: {} - {}'.format(
                tf_op.type, tf_op.name))

        print('Layer name: {}, layer type: {}, current shape: {}'.format(
            layer['name'], layer['class_name'], output_shape))
        layer_list.append(layer)

    #################
    ## Generate HLS
    #################

    reader = TFDataReader(graph)
    print('Creating HLS model')
    hls_model = HLSModel(yamlConfig, reader, layer_list, input_layers,
                         output_layers)
    optimizers = [
        'eliminate_linear_activation', 'merge_batch_norm_quantized_tanh',
        'quantize_dense_output', 'fuse_biasadd', 'fuse_dense_batch_norm'
    ]
    optimize_model(hls_model, optimizers)
    return hls_model
Ejemplo n.º 30
0
    def apply_matmul_biasadd_relu_fusion(self, match_node_name):
        skip_node_name = match_node_name[1:]
        matched_node = self.node_name_mapping[match_node_name[0]]
        control_inputs, normal_inputs = self._get_node_input(
            matched_node.node.name)
        weight_name = normal_inputs[1]
        weight_node = self.node_name_mapping[helper.node_name_from_input(
            weight_name)].node

        # FIXME We only quantize the MatMul op which second input node type is const. This is a
        # workaround for RNN model like LTSM.
        if weight_node.op != 'Const':
            self.output_graph = self.input_graph
            return []

        weights_content = tensor_util.MakeNdarray(
            weight_node.attr['value'].tensor)

        if np.any(np.isnan(weights_content)):
            self.output_graph = self.input_graph
            return []

        for i in self.node_name_mapping:
            if weight_node.name in self.node_name_mapping[i].output:
                self.output_graph = self.input_graph
                return []

        q_weights_name, q_weights_min_name, q_weights_max_name = \
            self._intel_cpu_quantize_weight_eightbit(
                matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel)

        skip_node_name.append(weight_name)

        for _, node in enumerate(self.input_graph.node):
            if node.name in skip_node_name:
                pass
            elif node.name == match_node_name[0]:
                self.logger.debug("matched node {} with input {}".format(
                    node.name, node.input))

                self.logger.debug("apply_matmul_biasadd_relu_fusion")

                quantized_node_name = node.name + "_eightbit_quantized_mat_mul"
                bias_node_name = self.node_name_mapping[
                    match_node_name[1]].node.input[1]
                relu_node_name = match_node_name[2]
                all_input_names = self._add_eightbit_prologue_nodes(
                    matched_node.node.name)
                all_input_names = all_input_names[:1] + [
                    q_weights_name
                ] + all_input_names[1:]
                all_input_names.append(q_weights_min_name)
                all_input_names.append(q_weights_max_name)
                quantized_node_input_names = all_input_names[:2] + [
                    bias_node_name
                ] + all_input_names[2:] + control_inputs

                quantized_matmul_node = helper.create_node(
                    "QuantizedMatMulWithBiasAndRelu", quantized_node_name,
                    quantized_node_input_names)

                helper.copy_attr(quantized_matmul_node, "transpose_a",
                                 node.attr["transpose_a"])
                helper.copy_attr(quantized_matmul_node, "transpose_b",
                                 node.attr["transpose_b"])
                helper.set_attr_dtype(quantized_matmul_node, "T1",
                                      dtypes.quint8)
                helper.set_attr_dtype(quantized_matmul_node, "T2",
                                      dtypes.qint8)
                helper.set_attr_dtype(quantized_matmul_node, "Toutput",
                                      dtypes.qint32)
                helper.set_attr_string(
                    quantized_matmul_node, 'input_quant_mode',
                    b'MIN_FIRST' if self.is_asymmetric else b'SCALED')

                self.add_output_graph_node(quantized_matmul_node)

                quantize_down_name = self._add_quantize_down_nodes(
                    node, quantized_node_name, dtypes.quint8, False)
                self._intel_cpu_add_dequantize_result_node(
                    quantize_down_name, relu_node_name)
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                self.add_output_graph_node(new_node)
        return match_node_name