def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      caption_annotations=None,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    caption_annotations:
      list of dict with keys: [u'id', u'image_id', u'str'].
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.

  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        tfrecord_util.int64_feature(image_height),
        'image/width':
        tfrecord_util.int64_feature(image_width),
        'image/filename':
        tfrecord_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        tfrecord_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        tfrecord_util.bytes_feature(encoded_jpg),
        'image/format':
        tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
    }

    num_annotations_skipped = 0
    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])
            if width <= 0 or height <= 0:
                num_annotations_skipped += 1
                continue
            if x + width > image_width or y + height > image_height:
                num_annotations_skipped += 1
                continue
            xmin.append(float(x) / image_width)
            xmax.append(float(x + width) / image_width)
            ymin.append(float(y) / image_height)
            ymax.append(float(y + height) / image_height)
            is_crowd.append(object_annotations['iscrowd'])
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_masks:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                if not object_annotations['iscrowd']:
                    binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())
        feature_dict.update({
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(category_names),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            tfrecord_util.int64_list_feature(is_crowd),
            'image/object/area':
            tfrecord_util.float_list_feature(area),
        })
        if include_masks:
            feature_dict['image/object/mask'] = (
                tfrecord_util.bytes_list_feature(encoded_mask_png))
    if caption_annotations:
        captions = []
        for caption_annotation in caption_annotations:
            captions.append(caption_annotation['caption'].encode('utf8'))
        feature_dict.update(
            {'image/caption': tfrecord_util.bytes_list_feature(captions)})

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages',
                       ann_json_dict=None):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by running
      tfrecord_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the PASCAL dataset
      directory holding the actual image data.
    ann_json_dict: annotation json dictionary.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])
  image_id = get_image_id(data['filename'])
  if ann_json_dict:
    image = {
        'file_name': data['filename'],
        'height': height,
        'width': width,
        'id': image_id,
    }
    ann_json_dict['images'].append(image)

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  area = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  if 'object' in data:
    for obj in data['object']:
      difficult = bool(int(obj['difficult']))
      if ignore_difficult_instances and difficult:
        continue

      difficult_obj.append(int(difficult))

      xmin.append(float(obj['bndbox']['xmin']) / width)
      ymin.append(float(obj['bndbox']['ymin']) / height)
      xmax.append(float(obj['bndbox']['xmax']) / width)
      ymax.append(float(obj['bndbox']['ymax']) / height)
      area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1]))
      classes_text.append(obj['name'].encode('utf8'))
      classes.append(label_map_dict[obj['name']])
      truncated.append(int(obj['truncated']))
      poses.append(obj['pose'].encode('utf8'))

      if ann_json_dict:
        abs_xmin = int(obj['bndbox']['xmin'])
        abs_ymin = int(obj['bndbox']['ymin'])
        abs_xmax = int(obj['bndbox']['xmax'])
        abs_ymax = int(obj['bndbox']['ymax'])
        abs_width = abs_xmax - abs_xmin
        abs_height = abs_ymax - abs_ymin
        ann = {
            'area': abs_width * abs_height,
            'iscrowd': 0,
            'image_id': image_id,
            'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
            'category_id': label_map_dict[obj['name']],
            'id': get_ann_id(),
            'ignore': 0,
            'segmentation': [],
        }
        ann_json_dict['annotations'].append(ann)

  example = tf.train.Example(
      features=tf.train.Features(
          feature={
              'image/height':
                  tfrecord_util.int64_feature(height),
              'image/width':
                  tfrecord_util.int64_feature(width),
              'image/filename':
                  tfrecord_util.bytes_feature(data['filename'].encode('utf8')),
              'image/source_id':
                  tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
              'image/key/sha256':
                  tfrecord_util.bytes_feature(key.encode('utf8')),
              'image/encoded':
                  tfrecord_util.bytes_feature(encoded_jpg),
              'image/format':
                  tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
              'image/object/bbox/xmin':
                  tfrecord_util.float_list_feature(xmin),
              'image/object/bbox/xmax':
                  tfrecord_util.float_list_feature(xmax),
              'image/object/bbox/ymin':
                  tfrecord_util.float_list_feature(ymin),
              'image/object/bbox/ymax':
                  tfrecord_util.float_list_feature(ymax),
              'image/object/area':
                  tfrecord_util.float_list_feature(area),
              'image/object/class/text':
                  tfrecord_util.bytes_list_feature(classes_text),
              'image/object/class/label':
                  tfrecord_util.int64_list_feature(classes),
              'image/object/difficult':
                  tfrecord_util.int64_list_feature(difficult_obj),
              'image/object/truncated':
                  tfrecord_util.int64_list_feature(truncated),
              'image/object/view':
                  tfrecord_util.bytes_list_feature(poses),
          }))
  return example
def json_to_tf_example(example, label_map_dict, ann_json_dict=None):

    img_path = os.path.splitext(example)[0] + '.jpg'
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    # SHA校验码
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width = image.width
    height = image.height

    with open(example, 'r') as f:
        data = json.load(f)

    image_id = get_image_id(data['imagePath'])
    if ann_json_dict:
        image = {
            'file_name': data['imagePath'],
            'height': height,
            'width': width,
            'id': image_id,
        }
        ann_json_dict['images'].append(image)

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    # truncated = []
    # poses = []
    # difficult_obj = []

    if 'shapes' in data:
        for obj in data['shapes']:
            # 忽略.names文件中不存在的类别 label
            if obj['label'] not in label_map_dict:
                continue

            # 转化bbox坐标
            x0, y0, x1, y1 = _get_boundingbox(obj['points'])
            xmin.append(x0 / width)
            ymin.append(y0 / height)
            xmax.append(x1 / width)
            ymax.append(y1 / height)
            # 类别标签设置utf8编码,以支持中文
            classes_text.append(obj['label'].encode('utf-8'))
            classes.append(label_map_dict[obj['label']])

            if ann_json_dict:
                abs_xmin = int(x0)
                abs_ymin = int(y0)
                abs_xmax = int(x1)
                abs_ymax = int(y1)
                abs_width = abs_xmax - abs_xmin
                abs_height = abs_ymax - abs_ymin
                ann = {
                    'area': abs_width * abs_height,
                    'iscrowd': 0,
                    'image_id': image_id,
                    'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
                    'category_id': label_map_dict[obj['label']],
                    'id': get_ann_id(),
                    'ignore': 0,
                    'segmentation': [],
                }
                ann_json_dict['annotations'].append(ann)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_util.int64_feature(height),
            'image/width':
            tfrecord_util.int64_feature(width),
            'image/filename':
            tfrecord_util.bytes_feature(data['imagePath'].encode('utf8')),
            'image/source_id':
            tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
            'image/key/sha256':
            tfrecord_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_util.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(classes),
        }))

    return tf_example
  def transfer_one_block(self, tfrecord_fn, block_sampled_datas, raw_vertex_num):
    from tfrecord_util import bytes_feature, int64_feature

    if not hasattr(self, 'eles_sorted'):
      self.sort_eles(block_sampled_datas.keys())
    #*************************************************************************
    dls = {}
    for item in self.eles_sorted:
      dls[item] = []
      for e in self.eles_sorted[item]:
        data = block_sampled_datas[e]
        dls[item].append( data )
      if len(dls[item]) >  0:
        dls[item] = np.concatenate(dls[item], -1) # dtye auto transform here if needed

    #*************************************************************************
    # fix face_i shape
    if 'face_i' in dls:
      face_shape = dls['face_i'].shape
      tile_num = self.num_face - face_shape[0]
      assert tile_num>=0, "face num > buffer: {}>{}".format(face_shape[0], self.num_face)
      tmp = np.tile( dls['face_i'][0:1,:], [tile_num, 1])
      #tmp = np.ones([self.num_face - face_shape[0], face_shape[1]], np.int32) * (-777)
      dls['face_i'] = np.concatenate([dls['face_i'], tmp], 0)

    #*************************************************************************
    # convert to expample
    dls = Raw_To_Tfrecord.check_types(dls)
    if not hasattr(self, 'ele_idxs'):
      self.record_shape_idx(block_sampled_datas, dls)
      print(self.ele_idxs)

    max_category =  np.max(ele_in_feature(dls, 'label_category', self.ele_idxs))
    assert max_category < self.dataset_meta.num_classes, "max_category {} > {}".format(\
                                          max_category, self.dataset_meta.num_classes)

    vertex_f_bin = dls['vertex_f'].tobytes()
    #vertex_i_shape_bin = np.array(dls['vertex_i'].shape, np.int32).tobytes()
    if 'vertex_i' in dls:
      vertex_i_bin = dls['vertex_i'].tobytes()
    vertex_uint8_bin = dls['vertex_uint8'].tobytes()
    if 'face_i' in dls:
      face_i_bin = dls['face_i'].tobytes()


    features_map = {
      'vertex_f': bytes_feature(vertex_f_bin),
      'vertex_uint8': bytes_feature(vertex_uint8_bin),
    }
    if 'vertex_i' in dls:
      features_map['vertex_i'] = bytes_feature(vertex_i_bin)
    if 'face_i' in dls:
      features_map['face_i'] = bytes_feature(face_i_bin)
      features_map['valid_num_face'] = int64_feature(face_shape[0])

    example = tf.train.Example(features=tf.train.Features(feature=features_map))

    #*************************************************************************
    with tf.python_io.TFRecordWriter( tfrecord_fn ) as raw_tfrecord_writer:
      raw_tfrecord_writer.write(example.SerializeToString())

    if self.fi %5 ==0:
      print('{}/{} write tfrecord OK: {}'.format(self.fi, self.fn, tfrecord_fn))
Exemple #5
0
def create_tf_example(
    image,
    image_dir,
    bbox_annotations=None,
    category_index=None,
    caption_annotations=None,
):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
        u'width', u'date_captured', u'flickr_url', u'id']
      image_dir: directory containing the image files.
      bbox_annotations:
        list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
          u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
          coordinates in the official COCO dataset are given as [x, y, width,
          height] tuples using absolute coordinates where x, y represent the
          top-left (0-indexed) corner.  This function converts to the format
          expected by the Tensorflow Object Detection API (which is which is
          [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
          size).
      category_index: a dict containing COCO category information keyed by the
        'id' field of each category.  See the label_map_util.create_category_index
        function.
      caption_annotations:
        list of dict with keys: [u'id', u'image_id', u'str'].

    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image["height"]
    image_width = image["width"]
    filename = image["file_name"]
    image_id = image["id"]

    full_path = os.path.join(image_dir, filename)
    with tf.io.gfile.GFile(full_path, "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        "image/height":
        tfrecord_util.int64_feature(image_height),
        "image/width":
        tfrecord_util.int64_feature(image_width),
        "image/filename":
        tfrecord_util.bytes_feature(filename.encode("utf8")),
        "image/source_id":
        tfrecord_util.bytes_feature(str(image_id).encode("utf8")),
        "image/key/sha256":
        tfrecord_util.bytes_feature(key.encode("utf8")),
        "image/encoded":
        tfrecord_util.bytes_feature(encoded_jpg),
        "image/format":
        tfrecord_util.bytes_feature("jpeg".encode("utf8")),
    }

    num_annotations_skipped = 0
    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []

    if bbox_annotations:
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations["bbox"])
            if width <= 0 or height <= 0:
                num_annotations_skipped += 1
                continue
            if x + width > image_width or y + height > image_height:
                num_annotations_skipped += 1
                continue
            xmin.append(float(x) / image_width)
            xmax.append(float(x + width) / image_width)
            ymin.append(float(y) / image_height)
            ymax.append(float(y + height) / image_height)
            is_crowd.append(object_annotations["iscrowd"])
            category_id = int(object_annotations["category_id"])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]["name"].encode("utf8"))
            area.append(object_annotations["area"])

    feature_dict.update({
        "image/object/bbox/xmin":
        tfrecord_util.float_list_feature(xmin),
        "image/object/bbox/xmax":
        tfrecord_util.float_list_feature(xmax),
        "image/object/bbox/ymin":
        tfrecord_util.float_list_feature(ymin),
        "image/object/bbox/ymax":
        tfrecord_util.float_list_feature(ymax),
        "image/object/class/text":
        tfrecord_util.bytes_list_feature(category_names),
        "image/object/class/label":
        tfrecord_util.int64_list_feature(category_ids),
        "image/object/is_crowd":
        tfrecord_util.int64_list_feature(is_crowd),
        "image/object/area":
        tfrecord_util.float_list_feature(area),
    })

    if caption_annotations:
        captions = []
        for caption_annotation in caption_annotations:
            captions.append(caption_annotation["caption"].encode("utf8"))
        feature_dict.update(
            {"image/caption": tfrecord_util.bytes_list_feature(captions)})

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped