コード例 #1
0
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature('0'.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #2
0
ファイル: dataloader_test.py プロジェクト: swapnil3597/automl
 def _make_fake_tfrecord(self):
     tfrecord_path = os.path.join(tempfile.mkdtemp(), 'test.tfrecords')
     writer = tf.io.TFRecordWriter(tfrecord_path)
     encoded_jpg = tf.io.encode_jpeg(tf.ones([512, 512, 3], dtype=tf.uint8))
     example = tf.train.Example(features=tf.train.Features(
         feature={
             'image/height':
             tfrecord_util.int64_feature(512),
             'image/width':
             tfrecord_util.int64_feature(512),
             'image/filename':
             tfrecord_util.bytes_feature('test_file_name.jpg'.encode(
                 'utf8')),
             'image/source_id':
             tfrecord_util.bytes_feature('123456'.encode('utf8')),
             'image/key/sha256':
             tfrecord_util.bytes_feature('qwdqwfw12345'.encode('utf8')),
             'image/encoded':
             tfrecord_util.bytes_feature(encoded_jpg.numpy()),
             'image/format':
             tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
             'image/object/bbox/xmin':
             tfrecord_util.float_list_feature([0.1]),
             'image/object/bbox/xmax':
             tfrecord_util.float_list_feature([0.1]),
             'image/object/bbox/ymin':
             tfrecord_util.float_list_feature([0.2]),
             'image/object/bbox/ymax':
             tfrecord_util.float_list_feature([0.2]),
             'image/object/class/text':
             tfrecord_util.bytes_list_feature(['test'.encode('utf8')]),
             'image/object/class/label':
             tfrecord_util.int64_list_feature([1]),
             'image/object/difficult':
             tfrecord_util.int64_list_feature([]),
             'image/object/truncated':
             tfrecord_util.int64_list_feature([]),
             'image/object/view':
             tfrecord_util.bytes_list_feature([]),
         }))
     writer.write(example.SerializeToString())
     return tfrecord_path
コード例 #3
0
def create_tf_example(img_path,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      category_df=None,
                      caption_annotations=None,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
        u'width', u'date_captured', u'flickr_url', u'id']
      image_dir: directory containing the image files.
      bbox_annotations:
        list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
          u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
          coordinates in the official COCO dataset are given as [x, y, width,
          height] tuples using absolute coordinates where x, y represent the
          top-left (0-indexed) corner.  This function converts to the format
          expected by the Tensorflow Object Detection API (which is which is
          [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
          size).
      category_index: a dict containing COCO category information keyed by the
        'id' field of each category.  See the label_map_util.create_category_index
        function.
      caption_annotations:
        list of dict with keys: [u'id', u'image_id', u'str'].
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.

    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img = cv2.imread(img_path)
    height, width, channel = img.shape
    image_height = height
    image_width = width
    filename = p.basename(img_path)
    image_id = _get_img_id(img_path)

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        tfrecord_util.int64_feature(image_height),
        'image/width':
        tfrecord_util.int64_feature(image_width),
        'image/filename':
        tfrecord_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        tfrecord_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        tfrecord_util.bytes_feature(encoded_jpg),
        'image/format':
        tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
    }

    num_annotations_skipped = 0
    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            xmin.append(object_annotations["XMin"])
            xmax.append(object_annotations["XMax"])
            ymin.append(object_annotations["YMin"])
            ymax.append(object_annotations["YMax"])
            is_crowd.append(False)
            category_id = int(object_annotations['LabelName'])
            category_ids.append(category_id)
            category_names.append(invClassDict[category_id].encode('utf8'))
            area.append(
                height * width *
                (object_annotations["XMax"] - object_annotations["XMin"]) *
                (object_annotations["YMax"] - object_annotations["YMin"]))

            if include_masks:  ##### todo todo todo
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                if not object_annotations['iscrowd']:
                    binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(category_names),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            tfrecord_util.int64_list_feature(is_crowd),
            'image/object/area':
            tfrecord_util.float_list_feature(area),
        })

        if include_masks:  ### todotodotodo
            feature_dict['image/object/mask'] = (
                tfrecord_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))

    return example, num_annotations_skipped
コード例 #4
0
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages',
                       ann_json_dict=None):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running tfrecord_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.
    ann_json_dict: annotation json dictionary.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  # img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])

  full_path = os.path.join(dataset_directory,data['filename'])
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)

  image = PIL.Image.open(encoded_jpg_io)

  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  # width = int(data['size']['width'])
  # height = int(data['size']['height'])
  width, height = image.size
  image_id = get_image_id(data['filename'])
  if ann_json_dict:
    image = {
        'file_name': data['filename'],
        'height': height,
        'width': width,
        'id': image_id,
    }
    ann_json_dict['images'].append(image)

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  # if 'object' in data:
  #   for obj in data['object']:
  # difficult = bool(int(obj['difficult']))
  difficult = bool(0)
  # if ignore_difficult_instances and difficult:
  #     continue

  difficult_obj.append(int(difficult))

  xmin.append(float(data['x1']) / width)
  ymin.append(float(data['y1']) / height)
  xmax.append(float(data['x2']) / width)
  ymax.append(float(data['y2']) / height)

  classes_text.append(data['class'].encode('utf8'))
  classes.append(label_map_dict[data['class']])
  truncated.append(int(0))
  poses.append(POSE.encode('utf8'))

  if ann_json_dict:
      abs_xmin = int(data['xmin'])
      abs_ymin = int(data['ymin'])
      abs_xmax = int(data['xmax'])
      abs_ymax = int(data['ymax'])
      abs_width = abs_xmax - abs_xmin
      abs_height = abs_ymax - abs_ymin
      ann = {
          'area': abs_width * abs_height,
          'iscrowd': 0,
          'image_id': image_id,
          'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
          'category_id': label_map_dict[data['class']],
          'id': get_ann_id(),
          'ignore': 0,
          'segmentation': [],
      }
      ann_json_dict['annotations'].append(ann)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': tfrecord_util.int64_feature(height),
      'image/width': tfrecord_util.int64_feature(width),
      'image/filename': tfrecord_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': tfrecord_util.bytes_feature(
          str(image_id).encode('utf8')),
      'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')),
      'image/encoded': tfrecord_util.bytes_feature(encoded_jpg),
      'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin),
      'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax),
      'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin),
      'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax),
      'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text),
      'image/object/class/label': tfrecord_util.int64_list_feature(classes),
      'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj),
      'image/object/truncated': tfrecord_util.int64_list_feature(truncated),
      'image/object/view': tfrecord_util.bytes_list_feature(poses),
  }))
  return example
コード例 #5
0
def dict_to_tf_example(
    data,
    dataset_directory,
    label_map_dict,
    ignore_difficult_instances=False,
    image_subdirectory="JPEGImages",
    ann_json_dict=None,
):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by running
        tfrecord_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the PASCAL dataset
        directory holding the actual image data.
      ann_json_dict: annotation json dictionary.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(image_subdirectory, data["filename"])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    """
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  """
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data["size"]["width"])
    height = int(data["size"]["height"])
    image_id = get_image_id(data["filename"])
    if ann_json_dict:
        image = {
            "file_name": data["filename"],
            "height": height,
            "width": width,
            "id": image_id,
        }
        ann_json_dict["images"].append(image)

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    area = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if "object" in data:
        for obj in data["object"]:
            difficult = bool(int(obj["difficult"]))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj["bndbox"]["xmin"]) / width)
            ymin.append(float(obj["bndbox"]["ymin"]) / height)
            xmax.append(float(obj["bndbox"]["xmax"]) / width)
            ymax.append(float(obj["bndbox"]["ymax"]) / height)
            area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1]))
            classes_text.append(obj["name"].encode("utf8"))
            classes.append(label_map_dict[obj["name"]])
            truncated.append(int(obj["truncated"]))
            poses.append(obj["pose"].encode("utf8"))

            if ann_json_dict:
                abs_xmin = int(obj["bndbox"]["xmin"])
                abs_ymin = int(obj["bndbox"]["ymin"])
                abs_xmax = int(obj["bndbox"]["xmax"])
                abs_ymax = int(obj["bndbox"]["ymax"])
                abs_width = abs_xmax - abs_xmin
                abs_height = abs_ymax - abs_ymin
                ann = {
                    "area": abs_width * abs_height,
                    "iscrowd": 0,
                    "image_id": image_id,
                    "bbox": [abs_xmin, abs_ymin, abs_width, abs_height],
                    "category_id": label_map_dict[obj["name"]],
                    "id": get_ann_id(),
                    "ignore": 0,
                    "segmentation": [],
                }
                ann_json_dict["annotations"].append(ann)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                "image/height": tfrecord_util.int64_feature(height),
                "image/width": tfrecord_util.int64_feature(width),
                "image/filename": tfrecord_util.bytes_feature(
                    data["filename"].encode("utf8")
                ),
                "image/source_id": tfrecord_util.bytes_feature(
                    str(image_id).encode("utf8")
                ),
                "image/key/sha256": tfrecord_util.bytes_feature(key.encode("utf8")),
                "image/encoded": tfrecord_util.bytes_feature(encoded_jpg),
                "image/format": tfrecord_util.bytes_feature("jpeg".encode("utf8")),
                "image/object/bbox/xmin": tfrecord_util.float_list_feature(xmin),
                "image/object/bbox/xmax": tfrecord_util.float_list_feature(xmax),
                "image/object/bbox/ymin": tfrecord_util.float_list_feature(ymin),
                "image/object/bbox/ymax": tfrecord_util.float_list_feature(ymax),
                "image/object/area": tfrecord_util.float_list_feature(area),
                "image/object/class/text": tfrecord_util.bytes_list_feature(
                    classes_text
                ),
                "image/object/class/label": tfrecord_util.int64_list_feature(classes),
                "image/object/difficult": tfrecord_util.int64_list_feature(
                    difficult_obj
                ),
                "image/object/truncated": tfrecord_util.int64_list_feature(truncated),
                "image/object/view": tfrecord_util.bytes_list_feature(poses),
            }
        )
    )
    return example
コード例 #6
0
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ann_json_dict=None):
    """
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: from wider_annotations[SET]
    dataset_directory: Path to root directory holding WIDER dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the  dataset
      directory holding the actual image data.
    ann_json_dict: annotation json dictionary.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """

    img_path = os.path.join(data['path'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.io.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(image.width)
    height = int(image.height)
    image_id = get_image_id()
    if ann_json_dict:
        image = {
            'file_name': data['path'],
            'height': height,
            'width': width,
            'id': image_id,
        }
        ann_json_dict['images'].append(image)

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    area = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for i, bx in enumerate(data['bbox']):
        difficult = False

        difficult_obj.append(int(difficult))

        xmin.append(float(bx[0]) / width)
        ymin.append(float(bx[1]) / height)
        xmax.append(float(bx[2]) / width)
        ymax.append(float(bx[3]) / height)
        area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1]))
        classes_text.append('face'.encode('utf8'))
        classes.append(label_map_dict['face'])
        truncated.append(False)
        poses.append(str(data['poses'][i]).encode('utf8'))

        if ann_json_dict:
            abs_xmin = int(bx[0])
            abs_ymin = int(bx[1])
            abs_xmax = int(bx[2])
            abs_ymax = int(bx[3])
            abs_width = abs_xmax - abs_xmin
            abs_height = abs_ymax - abs_ymin
            ann = {
                'area': abs_width * abs_height,
                'iscrowd': 0,
                'image_id': image_id,
                'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
                'category_id': label_map_dict['face'],
                'id': get_ann_id(),
                'ignore': 0,
                'segmentation': [],
            }
            ann_json_dict['annotations'].append(ann)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_util.int64_feature(height),
            'image/width':
            tfrecord_util.int64_feature(width),
            'image/filename':
            tfrecord_util.bytes_feature(str(data['path']).encode('utf8')),
            'image/source_id':
            tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
            'image/key/sha256':
            tfrecord_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_util.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/area':
            tfrecord_util.float_list_feature(area),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(classes),
            'image/object/difficult':
            tfrecord_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            tfrecord_util.int64_list_feature(truncated),
            'image/object/view':
            tfrecord_util.bytes_list_feature(poses),
        }))
    return example
コード例 #7
0
def dict_to_tf_example(data, image_path, label_map_dict):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by running
      tfrecord_util.recursive_parse_xml_to_dict)
    images_dir: Path to the directory holding raw images.
    label_map_dict: A map from string label names to integers ids.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    full_path = os.path.join(image_path)
    with tf.io.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])
    image_id = get_image_id(data['filename'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    area = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1]))
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_util.int64_feature(height),
            'image/width':
            tfrecord_util.int64_feature(width),
            'image/filename':
            tfrecord_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
            'image/key/sha256':
            tfrecord_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_util.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/area':
            tfrecord_util.float_list_feature(area),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(classes),
            'image/object/difficult':
            tfrecord_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            tfrecord_util.int64_list_feature(truncated),
            'image/object/view':
            tfrecord_util.bytes_list_feature(poses),
        }))
    return example
コード例 #8
0
def dict_to_tf_example(img_path: str,
                       base_dir: str,
                       bboxes: list,
                       label_map_dict,
                       ann_json_dict=None):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      img_path: The path (or relative to base dir) of the image.
      base_dir: The directory where all data sets are stored.
      bboxes: A list of all bounding boxes in that image.
      label_map_dict: A map from string label names to integers ids.

    Returns:
      example: The converted tf.Example.
    """

    full_path = os.path.join(base_dir, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_img = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_img)
    image = PIL.Image.open(encoded_jpg_io)
    width, height = image.size
    key = hashlib.sha256(encoded_img).hexdigest()

    image_id = get_image_id(img_path)

    if ann_json_dict:
        image = {
            'file_name': img_path,
            'height': height,
            'width': width,
            'id': image_id,
        }
        ann_json_dict['images'].append(image)

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []

    for bb in bboxes:
        box = bb.split(',')
        xmin.append(float(box[0]) / width)
        ymin.append(float(box[1]) / height)
        xmax.append(float(box[2]) / width)
        ymax.append(float(box[3]) / height)
        classes_text.append(box[4].encode('utf8'))
        classes.append(label_map_dict[box[4]])

        if ann_json_dict:
            abs_xmin = int(box[0])
            abs_ymin = int(box[1])
            abs_xmax = int(box[2])
            abs_ymax = int(box[3])
            abs_width = abs_xmax - abs_xmin
            abs_height = abs_ymax - abs_ymin
            ann = {
                'area': abs_width * abs_height,
                'iscrowd': 0,
                'image_id': image_id,
                'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
                'category_id': label_map_dict[box[4]],
                'id': get_ann_id(),
                'ignore': 0,
                'segmentation': [],
            }
            ann_json_dict['annotations'].append(ann)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_util.int64_feature(height),
            'image/width':
            tfrecord_util.int64_feature(width),
            'image/filename':
            tfrecord_util.bytes_feature(img_path.encode('utf8')),
            'image/source_id':
            tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
            'image/key/sha256':
            tfrecord_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_util.bytes_feature(encoded_img),
            'image/object/bbox/xmin':
            tfrecord_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_util.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_util.int64_list_feature(classes)
        }))
    return example
コード例 #9
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.

  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.compat.v1.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          tfrecord_util.int64_feature(image_height),
      'image/width':
          tfrecord_util.int64_feature(image_width),
      'image/filename':
          tfrecord_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          tfrecord_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          tfrecord_util.bytes_feature(encoded_jpg),
      'image/format':
          tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          tfrecord_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          tfrecord_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          tfrecord_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          tfrecord_util.float_list_feature(ymax),
      'image/object/class/text':
          tfrecord_util.bytes_list_feature(category_names),
      'image/object/is_crowd':
          tfrecord_util.int64_list_feature(is_crowd),
      'image/object/area':
          tfrecord_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        tfrecord_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
コード例 #10
0
def dict_to_tf_example(data,
                       dataset_directory,
                       filepath,
                       camera_settings,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages',
                       visibility_thresh=0.1,
                       ann_json_dict=None):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by running
        tfrecord_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the PASCAL dataset
        directory holding the actual image data.
      ann_json_dict: annotation json dictionary.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """

    img_path = filepath.split('.')[0] + '.jpg'
    with tf.io.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()


    width = int(camera_settings['captured_image_size']['width'])
    height = int(camera_settings['captured_image_size']['height'])
    image_id = img_path.split('.')[0].split('/')[-1]
    image_id = get_image_id(img_path)

    if ann_json_dict:
        image = {
            'file_name': img_path,
            'height': height,
            'width': width,
            'id': image_id,
        }
        ann_json_dict['images'].append(image)

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    area = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'objects' in data:
        for obj in data['objects']:

            difficult_obj.append(0)

            xmin.append(float(obj['bounding_box']['top_left'][1]) / width)
            ymin.append(float(obj['bounding_box']['top_left'][0]) / height)
            xmax.append(float(obj['bounding_box']['bottom_right'][1]) / width)
            ymax.append(float(obj['bounding_box']['bottom_right'][0]) / height)
            area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1]))
            classes_text.append(obj['class'].encode('utf8'))
            classes.append(label_map_dict[obj['class']])
            visibility = obj['visibility']
            truncated.append(int(visibility > visibility_thresh))
            poses.append('Frontal'.encode('utf8'))

            if ann_json_dict:
                abs_xmin = int(obj['bounding_box']['top_left'][1])
                abs_ymin = int(obj['bounding_box']['top_left'][0])
                abs_xmax = int(obj['bounding_box']['bottom_right'][1])
                abs_ymax = int(obj['bounding_box']['bottom_right'][0])
                abs_width = abs_xmax - abs_xmin
                abs_height = abs_ymax - abs_ymin
                ann = {
                    'area': abs_width * abs_height,
                    'iscrowd': 0,
                    'image_id': image_id,
                    'bbox': [abs_xmin, abs_ymin, abs_width, abs_height],
                    'category_id': label_map_dict[obj['class']],
                    'id': get_ann_id(),
                    'ignore': 0,
                    'segmentation': [],
                }
                ann_json_dict['annotations'].append(ann)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/height':
                    tfrecord_util.int64_feature(height),
                'image/width':
                    tfrecord_util.int64_feature(width),
                'image/filename':
                    tfrecord_util.bytes_feature(img_path.encode('utf8')),
                'image/source_id':
                    tfrecord_util.bytes_feature(str(image_id).encode('utf8')),
                'image/key/sha256':
                    tfrecord_util.bytes_feature(key.encode('utf8')),
                'image/encoded':
                    tfrecord_util.bytes_feature(encoded_jpg),
                'image/format':
                    tfrecord_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin':
                    tfrecord_util.float_list_feature(xmin),
                'image/object/bbox/xmax':
                    tfrecord_util.float_list_feature(xmax),
                'image/object/bbox/ymin':
                    tfrecord_util.float_list_feature(ymin),
                'image/object/bbox/ymax':
                    tfrecord_util.float_list_feature(ymax),
                'image/object/area':
                    tfrecord_util.float_list_feature(area),
                'image/object/class/text':
                    tfrecord_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                    tfrecord_util.int64_list_feature(classes),
                'image/object/difficult':
                    tfrecord_util.int64_list_feature(difficult_obj),
                'image/object/truncated':
                    tfrecord_util.int64_list_feature(truncated),
                'image/object/view':
                    tfrecord_util.bytes_list_feature(poses),
            }))
    return example