def create_mock_tfrecord():
    pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8),
                                'RGB')
    image_output_stream = StringIO.StringIO()
    pil_image.save(image_output_stream, format='png')
    encoded_image = image_output_stream.getvalue()

    feature_map = {
        'test_field':
        dataset_util.float_list_feature([1, 2, 3, 4]),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_map))
    with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
        writer.write(tf_example.SerializeToString())
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(data['folder'], image_subdirectory,
                            data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        classes_text.append(obj['name'].encode('utf8'))
        classes.append(label_map_dict[obj['name']])
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
    """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

    filtered_data_frame = annotations_data_frame[
        annotations_data_frame.LabelName.isin(label_map)]

    image_id = annotations_data_frame.ImageID.iloc[0]

    feature_map = {
        standard_fields.TfExampleFields.object_bbox_ymin:
        dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmin:
        dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_ymax:
        dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmax:
        dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
        standard_fields.TfExampleFields.object_class_text:
        dataset_util.bytes_list_feature(
            filtered_data_frame.LabelName.as_matrix()),
        standard_fields.TfExampleFields.object_class_label:
        dataset_util.int64_list_feature(
            filtered_data_frame.LabelName.map(
                lambda x: label_map[x]).as_matrix()),
        standard_fields.TfExampleFields.filename:
        dataset_util.bytes_feature('{}.jpg'.format(image_id)),
        standard_fields.TfExampleFields.source_id:
        dataset_util.bytes_feature(image_id),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    if 'IsGroupOf' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_group_of] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsGroupOf.as_matrix().astype(int))
    if 'IsOccluded' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_occluded] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsOccluded.as_matrix().astype(int))
    if 'IsTruncated' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_truncated] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsTruncated.as_matrix().astype(
                            int))
    if 'IsDepiction' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_depiction] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsDepiction.as_matrix().astype(
                            int))

    return tf.train.Example(features=tf.train.Features(feature=feature_map))
Exemple #4
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  mask_np = np.asarray(mask)
  nonbackground_indices_x = np.any(mask_np != 2, axis=0)
  nonbackground_indices_y = np.any(mask_np != 2, axis=1)
  nonzero_x_indices = np.where(nonbackground_indices_x)
  nonzero_y_indices = np.where(nonbackground_indices_y)

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))

    if faces_only:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])
    else:
      xmin = float(np.min(nonzero_x_indices))
      xmax = float(np.max(nonzero_x_indices))
      ymin = float(np.min(nonzero_y_indices))
      ymax = float(np.max(nonzero_y_indices))

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
      mask_remapped = mask_np != 2
      masks.append(mask_remapped)
    mask_stack = np.stack(masks).astype(np.float32)
    masks_flattened = np.reshape(mask_stack, [-1])

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
    feature_dict['image/object/mask'] = (
        dataset_util.float_list_feature(masks_flattened.tolist()))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Exemple #5
0
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0] * len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.float_list_feature(annotations['truncated']),
            'image/object/alpha':
            dataset_util.float_list_feature(annotations['alpha']),
            'image/object/3d_bbox/height':
            dataset_util.float_list_feature(annotations['3d_bbox_height']),
            'image/object/3d_bbox/width':
            dataset_util.float_list_feature(annotations['3d_bbox_width']),
            'image/object/3d_bbox/length':
            dataset_util.float_list_feature(annotations['3d_bbox_length']),
            'image/object/3d_bbox/x':
            dataset_util.float_list_feature(annotations['3d_bbox_x']),
            'image/object/3d_bbox/y':
            dataset_util.float_list_feature(annotations['3d_bbox_y']),
            'image/object/3d_bbox/z':
            dataset_util.float_list_feature(annotations['3d_bbox_z']),
            'image/object/3d_bbox/rot_y':
            dataset_util.float_list_feature(annotations['3d_bbox_rot_y']),
        }))

    return example