def dict_to_tf_example(data,
                       label_map_dict,
                       data_dir):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    """
  
    img_path = os.path.join(data_dir, data.replace("mask", "images"))
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = 512
    height = 512

    classes = []
    classes_text = []
    encoded_mask_png_list = []
    mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255
    output = io.BytesIO()
    encoded_mask_png_list.append(mask_png.save(output, mask_png))
    class_name = 'water'
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])


    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def create_tf_example(row):
    full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [class_text_to_int(row['class'])]

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
Ejemplo n.º 3
0
def create_tf_example(example, writer):
  height = example['height']
  width = example['width']
  filename = example['filename']
  encoded_image_data = example['encoded_image_data']
  image_format = example['image_format']

  bboxes = example['bbox']
  xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box
  ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box
  classes_text = example['class_text']
  classes = example['class_idx']

  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  writer.write(tf_example.SerializeToString())
Ejemplo n.º 4
0
  def testDecodePngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
    decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
    encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
    decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
    encoded_masks = [encoded_png_1, encoded_png_2]
    decoded_masks = np.stack([decoded_png_1, decoded_png_2])
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks)
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        decoded_masks,
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
Ejemplo n.º 5
0
  def testDecodeEmptyPngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    encoded_masks = []
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks),
                'image/height':
                    dataset_util.int64_feature(10),
                'image/width':
                    dataset_util.int64_feature(10),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
          [0, 10, 10])
Ejemplo n.º 6
0
  def testDecodeAdditionalChannels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    additional_channel_tensor = np.random.randint(
        256, size=(4, 5, 1)).astype(np.uint8)
    encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
    decoded_additional_channel = self._DecodeImage(encoded_additional_channel)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/additional_channels/encoded':
                    dataset_util.bytes_list_feature(
                        [encoded_additional_channel] * 2),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/source_id':
                    dataset_util.bytes_feature('image_id'),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        num_additional_channels=2)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          np.concatenate([decoded_additional_channel] * 2, axis=2),
          tensor_dict[fields.InputDataFields.image_additional_channels])
Ejemplo n.º 7
0
 def testDecodeImageLabels(self):
   image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
   encoded_jpeg = self._EncodeImage(image_tensor)
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
               'image/format': dataset_util.bytes_feature('jpeg'),
               'image/class/label': dataset_util.int64_list_feature([1, 2]),
           })).SerializeToString()
   example_decoder = tf_example_decoder.TfExampleDecoder()
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 2]))
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded':
                   dataset_util.bytes_feature(encoded_jpeg),
               'image/format':
                   dataset_util.bytes_feature('jpeg'),
               'image/class/text':
                   dataset_util.bytes_list_feature(['dog', 'cat']),
           })).SerializeToString()
   label_map_string = """
     item {
       id:3
       name:'cat'
     }
     item {
       id:1
       name:'dog'
     }
   """
   label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
   with tf.gfile.Open(label_map_path, 'wb') as f:
     f.write(label_map_string)
   example_decoder = tf_example_decoder.TfExampleDecoder(
       label_map_proto_file=label_map_path)
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     sess.run(tf.tables_initializer())
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 3]))
def createTfExample(singleFileData, path):
    # use TensorFlow's GFile function to open the .jpg image matching the current box data
    with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile:
        tensorFlowImage = tensorFlowImageFile.read()
    # end with

    # get the image width and height via converting from a TensorFlow image to an io library BytesIO image,
    # then to a PIL Image, then breaking out the width and height
    bytesIoImage = io.BytesIO(tensorFlowImage)
    pilImage = Image.open(bytesIoImage)
    width, height = pilImage.size

    # get the file name from the file data passed in, and set the image format to .jpg
    fileName = singleFileData.filename.encode('utf8')
    imageFormat = b'jpg'

    # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer
    xMins = []
    xMaxs = []
    yMins = []
    yMaxs = []
    classesAsText = []
    classesAsInts = []

    # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box)
    for index, row in singleFileData.object.iterrows():
        xMins.append(row['xmin'] / width)
        xMaxs.append(row['xmax'] / width)
        yMins.append(row['ymin'] / height)
        yMaxs.append(row['ymax'] / height)
        classesAsText.append(row['class'].encode('utf8'))
        classesAsInts.append(classAsTextToClassAsInt(row['class']))
    # end for

    # finally we can calculate and return the TensorFlow Example
    tfExample = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(fileName),
        'image/source_id': dataset_util.bytes_feature(fileName),
        'image/encoded': dataset_util.bytes_feature(tensorFlowImage),
        'image/format': dataset_util.bytes_feature(imageFormat),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xMins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(yMins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classesAsText),
        'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)}))

    return tfExample
def create_tf_example(filename, writer):
    lines = open(filename).readlines()
    image_filename = lines[0].strip()[1:]
    classes_text = []
    classes = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    im = Image.open(image_filename)
    arr = io.BytesIO()
    im.save(arr, format='PNG')
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    image_format = 'png'
    for line in lines[1:]:
        line = line.strip()
        if line == '':
            continue
        data = line.split(",")
        bbox = list(map(int, map(float, data[:4])))
        class_text = data[4].strip()
        class_idx = labels.index(class_text)
        classes_text.append(class_text)
        classes.append(class_idx)
        xmins.append(bbox[0]/float(width))
        xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box
        ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    writer.write(tf_example.SerializeToString())
Ejemplo n.º 10
0
  def testDecodeObjectLabelWithText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'dog']
    # Annotation label gets overridden by labelmap id.
    annotated_bbox_classes = [3, 4]
    expected_bbox_classes = [1, 2]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(annotated_bbox_classes),
            })).SerializeToString()
    label_map_string = """
      item {
        id:1
        name:'cat'
      }
      item {
        id:2
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    init = tf.tables_initializer()
    with self.test_session() as sess:
      sess.run(init)
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(expected_bbox_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
Ejemplo n.º 11
0
  def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'cheetah']
    bbox_classes_id = [5, 6]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes_id),
            })).SerializeToString()

    label_map_string = """
      item {
        name:'/m/cat'
        id:3
        display_name:'cat'
      }
      item {
        name:'/m/dog'
        id:1
        display_name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)
    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      sess.run(tf.tables_initializer())
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual([3, -1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
Ejemplo n.º 13
0
def dict_to_tf_example(data,
                       image_subdirectory):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  if 'object' in data:
    for obj in data['object']:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])

      xmins.append(xmin / width)
      ymins.append(ymin / height)
      xmaxs.append(xmax / width)
      ymaxs.append(ymax / height)
      class_name = obj['name']
      print("%s: Added class %s (%d)" % (data['filename'], class_name, class_text_to_int(class_name)))
      classes_text.append(class_name.encode('utf8'))
      classes.append(class_text_to_int(class_name))

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Ejemplo n.º 14
0
def dict_to_tf_example(labels_image, label_data, params, label_map_dict,
                       image_dir, image_prefix):

    fn_dec_rate = os.path.join(image_dir,
                               image_prefix + '_' + 'decay_rate' + '.png')
    img_dec_rate = cv2.imread(fn_dec_rate, 0)
    fn_int = os.path.join(image_dir, image_prefix + '_' + 'intensity' + '.png')
    img_int = cv2.imread(fn_int, 0)
    fn_zmax = os.path.join(image_dir, image_prefix + '_' + 'zmax' + '.png')
    img_zmax = cv2.imread(fn_zmax, 0)
    img = np.stack([img_dec_rate, img_int, img_zmax], axis=-1)
    img_output = '/home/fischer/U/kitti/record/hack/img/test.png'
    cv2.imwrite(img_output, img)
    encoded_png, key = _readImage(img_output)

    width = int(params['batch_processor']['width'] /
                params['batch_processor']['resolution'])
    height = int(params['batch_processor']['length'] /
                 params['batch_processor']['resolution'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    x_c = []
    y_c = []
    w = []
    h = []
    angle = []
    sin_angle = []
    cos_angle = []
    classes = []
    classes_text = []

    for idx, label_img in enumerate(labels_image):
        xmin.append(int(min(label_img[0])) / width)
        ymin.append(int(min(label_img[1])) / height)
        xmax.append(int(max(label_img[0])) / width)
        ymax.append(int(max(label_img[1])) / height)
        x_min = int(min(label_img[0])) / width
        y_min = int(min(label_img[1])) / height
        x_max = int(max(label_img[0])) / width
        y_max = int(max(label_img[1])) / height
        if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1):
            print('Higher:', x_min, y_min, x_max, y_max)
        if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0):
            print('Lower:', x_min, y_min, x_max, y_max)
        x_c.append(
            (int(min(label_img[0])) + int(max(label_img[0]))) / (2 * width))
        y_c.append(
            (int(min(label_img[1])) + int(max(label_img[1]))) / (2 * height))
        angle_rad = _flipAngle(label_data[idx].ry)
        angle.append(angle_rad)
        sin_angle.append(math.sin(2 * angle_rad))
        cos_angle.append(math.cos(2 * angle_rad))
        vec_s_x = math.cos(angle_rad)
        vec_s_y = math.sin(angle_rad)

        w_p = label_data[idx].w / params['batch_processor']['resolution']
        w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x /
                                (height * height) + vec_s_y * vec_s_y /
                                (width * width))
        w.append(w_p_s)

        l_p = label_data[idx].l / params['batch_processor']['resolution']
        l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x /
                                (width * width) + vec_s_y * vec_s_y /
                                (height * height))
        h.append(l_p_s)

        class_name = label_data[idx].type
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    return tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_prefix.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_prefix.encode('utf8')),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'boxes/inclined/x_c':
            dataset_util.float_list_feature(x_c),
            'boxes/inclined/y_c':
            dataset_util.float_list_feature(y_c),
            'boxes/inclined/w':
            dataset_util.float_list_feature(w),
            'boxes/inclined/h':
            dataset_util.float_list_feature(h),
            'boxes/inclined/angle':
            dataset_util.float_list_feature(angle),
            'boxes/inclined/sin_angle':
            dataset_util.float_list_feature(sin_angle),
            'boxes/inclined/cos_angle':
            dataset_util.float_list_feature(cos_angle),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
Ejemplo n.º 15
0
def xml_to_tf(path_input, path_output):
    xml_list = []
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']

    writer = tf.compat.v1.python_io.TFRecordWriter(path_output)

    files = os.listdir(path_input)
    for file in files:
        if file.endswith(".xml"):
            xmlFile = path_input + file

            tree = ET.parse(xmlFile)
            root = tree.getroot()
            
            filename = root[1].text
            width = int(root[4][0].text)
            height = int(root[4][1].text)

            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            classes_text = []
            classes = []
            

            for member in root.findall('object'):
                beer = member[0].text
                xmin = int(member[4][0].text)
                ymin = int(member[4][1].text)
                xmax = int(member[4][2].text)
                ymax = int(member[4][3].text)
                
                xmins.append(xmin/width)
                xmaxs.append(xmax/width)
                ymins.append(ymin/height)
                ymaxs.append(ymax/height)
                classes_text.append(beer.encode('utf8'))
                classes.append(class_text_to_int(beer))

            with tf.io.gfile.GFile(os.path.join(path_input, '{}'.format(filename)), 'rb') as fid:
                encoded_jpg = fid.read()
            tf_example = tf.train.Example(features=tf.train.Features(feature={
                'image/height': dataset_util.int64_feature(height),
                'image/width': dataset_util.int64_feature(width),
                'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
                'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
                'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                'image/format': dataset_util.bytes_feature(IMAGE_FORMAT),
                'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label': dataset_util.int64_list_feature(classes),
            }))
            
            writer.write(tf_example.SerializeToString())
    writer.close()             
    output_path = os.path.join(os.getcwd(), path_output)
    print('Successfully created the TFRecords: {}'.format(output_path))
Ejemplo n.º 16
0
def create_tf_example(group, path, class_dict):
    with tf.io.gfile.GFile(os.path.join(path, "{}".format(group.filename)),
                           "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode("utf8")
    image_format = b"jpg"
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        if set(["xmin_rel", "xmax_rel", "ymin_rel",
                "ymax_rel"]).issubset(set(row.index)):
            xmin = row["xmin_rel"]
            xmax = row["xmax_rel"]
            ymin = row["ymin_rel"]
            ymax = row["ymax_rel"]

        elif set(["xmin", "xmax", "ymin", "ymax"]).issubset(set(row.index)):
            xmin = row["xmin"] / width
            xmax = row["xmax"] / width
            ymin = row["ymin"] / height
            ymax = row["ymax"] / height

        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(row["class"].encode("utf8"))
        classes.append(class_dict[row["class"]])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            "image/height":
            dataset_util.int64_feature(height),
            "image/width":
            dataset_util.int64_feature(width),
            "image/filename":
            dataset_util.bytes_feature(filename),
            "image/source_id":
            dataset_util.bytes_feature(filename),
            "image/encoded":
            dataset_util.bytes_feature(encoded_jpg),
            "image/format":
            dataset_util.bytes_feature(image_format),
            "image/object/bbox/xmin":
            dataset_util.float_list_feature(xmins),
            "image/object/bbox/xmax":
            dataset_util.float_list_feature(xmaxs),
            "image/object/bbox/ymin":
            dataset_util.float_list_feature(ymins),
            "image/object/bbox/ymax":
            dataset_util.float_list_feature(ymaxs),
            "image/object/class/text":
            dataset_util.bytes_list_feature(classes_text),
            "image/object/class/label":
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_example(img_path,
                      json_path,
                      label_map_dict,
                      ignore_difficult_instances=False):

    with tf.gfile.GFile(json_path, 'r') as fid:
        json_str = fid.read()
    data = json.loads(json_str)
    #data = dataset_util.recursive_parse_xml_to_dict(xml)

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width, height = image.size

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if len(data["shapes"]) > 1:
        print("problem img {}".format(data["imagePath"]))

    for obj in data['shapes']:
        difficult = False
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        for p in obj["points"]:
            if p[0] >= width:
                print("problem {} and img size {} and img {}".format(
                    p, image.size, data["imagePath"]))
                p[0] = width - 1
            if p[0] < 0:
                print("problem {} and img size {} and img {}".format(
                    p, image.size, data["imagePath"]))
                p[0] = 0
            if p[1] >= height:
                print("problem {} and img size {} and img {}".format(
                    p, image.size, data["imagePath"]))
                p[1] = height - 1
            if p[1] < 0:
                print("problem {} and img size {} and img {}".format(
                    p, image.size, data["imagePath"]))
                p[1] = 0

        s_xmin = min([p[0] for p in obj["points"]])
        s_ymin = min([p[1] for p in obj["points"]])
        s_xmax = max([p[0] for p in obj["points"]])
        s_ymax = max([p[1] for p in obj["points"]])

        xmin.append(max(float(s_xmin) / width, 0))
        ymin.append(max(float(s_ymin) / height, 0))
        xmax.append(min(float(s_xmax) / width, 1.0))
        ymax.append(min(float(s_ymax) / height, 1.0))
        classes_text.append(obj["label"].encode('utf8'))
        classes.append(label_map_dict[obj["label"]])
        truncated.append(int(0))
        poses.append("Unspecified".encode('utf8'))

        # create masks
        polygon = [tuple(p) for p in obj["points"]]
        img = Image.new('L', (width, height), 0)
        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        mask = numpy.array(img)
        masks.append(mask)
        #img.save("mask.png")

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['imagePath'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['imagePath'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }

    encoded_mask_png_list = []
    for mask in masks:
        img = Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))


    xmin = float(obj['bndbox']['xmin'])
    xmax = float(obj['bndbox']['xmax'])
    ymin = float(obj['bndbox']['ymin'])
    ymax = float(obj['bndbox']['ymax'])

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Ejemplo n.º 19
0
def txt_to_tf_example(txt, img_path, label_map_dict):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """

    # img_path = os.path.join(image_subdirectory, data['filename'])
    _, filename = os.path.split(img_path)
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image {} format not JPEG'.format(img_path))
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(image.size[0])
    height = int(image.size[1])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in txt.split('\n'):
        if obj == '':
            continue
        difficult_obj.append(0)
        obj_data = obj.split(' ')

        xmin.append(float(obj_data[1]) - float(obj_data[3]) / 2)
        ymin.append(float(obj_data[2]) - float(obj_data[4]) / 2)
        xmax.append(float(obj_data[1]) + float(obj_data[3]) / 2)
        ymax.append(float(obj_data[2]) + float(obj_data[4]) / 2)
        # class_name = get_class_name_from_filename(data['filename'])
        # classes_text.append(class_name.encode('utf8'))
        # classes.append(label_map_dict[class_name])
        for name, idx in label_map_dict.items():
            if idx == int(obj_data[0]):
                classes_text.append(name.encode('utf8'))
        classes.append(int(obj_data[0]))
        truncated.append(0)
        poses.append('Frontal'.encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Ejemplo n.º 20
0
def dict_to_tf_example(filename, mask_path, label_map_dict, img_path):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    filename: name of the image 
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      dataset directory holding the actual image data.


  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by filename is not a valid JPEG
  """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    width = np.asarray(image).shape[1]
    height = np.asarray(image).shape[0]
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    mask_np = np.asarray(mask.convert('L'))
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []

    for k in list(mask_pixel.keys()):
        class_name = k
        nonbackground_indices_x = np.any(mask_np == mask_pixel[class_name],
                                         axis=0)
        nonbackground_indices_y = np.any(mask_np == mask_pixel[class_name],
                                         axis=1)
        nonzero_x_indices = np.where(nonbackground_indices_x)
        nonzero_y_indices = np.where(nonbackground_indices_y)

        if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray(
                nonzero_y_indices).shape[1] > 0:
            xmin = float(np.min(nonzero_x_indices))
            xmax = float(np.max(nonzero_x_indices))
            ymin = float(np.min(nonzero_y_indices))
            ymax = float(np.max(nonzero_y_indices))
            print(filename, 'bounding box for', class_name, xmin, xmax, ymin,
                  ymax)

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)

            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])

            mask_remapped = (mask_np == mask_pixel[class_name]).astype(
                np.uint8)
            masks.append(mask_remapped)

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }

    encoded_mask_png_list = []
    for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Ejemplo n.º 21
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
                      keypoint_annotations_dict=None,
                      densepose_annotations_dict=None,
                      remove_non_person_annotations=False,
                      remove_non_person_images=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    keypoint_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
    densepose_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
      representing part surface coordinates. For more information see
      http://densepose.org/.
    remove_non_person_annotations: Whether to remove any annotations that are
      not the "person" class.
    remove_non_person_images: Whether to remove any images that do not contain
      at least one "person" annotation.

  Returns:
    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
    num_keypoint_annotation_skipped: Number of keypoint annotations that were
      skipped.
    num_densepose_annotation_skipped: Number of DensePose annotations that were
      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    keypoints_x = []
    keypoints_y = []
    keypoints_visibility = []
    keypoints_name = []
    num_keypoints = []
    include_keypoint = keypoint_annotations_dict is not None
    num_annotations_skipped = 0
    num_keypoint_annotation_used = 0
    num_keypoint_annotation_skipped = 0
    dp_part_index = []
    dp_x = []
    dp_y = []
    dp_u = []
    dp_v = []
    dp_num_points = []
    densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
    include_densepose = densepose_annotations_dict is not None
    num_densepose_annotation_used = 0
    num_densepose_annotation_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        category_name = category_index[category_id]['name'].encode('utf8')
        if remove_non_person_annotations and category_name != b'person':
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_ids.append(category_id)
        category_names.append(category_name)
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())

        if include_keypoint:
            annotation_id = object_annotations['id']
            if annotation_id in keypoint_annotations_dict:
                num_keypoint_annotation_used += 1
                keypoint_annotations = keypoint_annotations_dict[annotation_id]
                keypoints = keypoint_annotations['keypoints']
                num_kpts = keypoint_annotations['num_keypoints']
                keypoints_x_abs = keypoints[::3]
                keypoints_x.extend(
                    [float(x_abs) / image_width for x_abs in keypoints_x_abs])
                keypoints_y_abs = keypoints[1::3]
                keypoints_y.extend(
                    [float(y_abs) / image_height for y_abs in keypoints_y_abs])
                keypoints_visibility.extend(keypoints[2::3])
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(num_kpts)
            else:
                keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(0)

        if include_densepose:
            annotation_id = object_annotations['id']
            if (annotation_id in densepose_annotations_dict
                    and all(key in densepose_annotations_dict[annotation_id]
                            for key in densepose_keys)):
                dp_annotations = densepose_annotations_dict[annotation_id]
                num_densepose_annotation_used += 1
                dp_num_points.append(len(dp_annotations['dp_I']))
                dp_part_index.extend([
                    int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I']
                ])
                # DensePose surface coordinates are defined on a [256, 256] grid
                # relative to each instance box (i.e. absolute coordinates in range
                # [0., 256.]). The following converts the coordinates
                # so that they are expressed in normalized image coordinates.
                dp_x_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_x']
                ]
                dp_x_norm = [(float(x) + x_box_rel * width) / image_width
                             for x_box_rel in dp_x_box_rel]
                dp_y_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_y']
                ]
                dp_y_norm = [(float(y) + y_box_rel * height) / image_height
                             for y_box_rel in dp_y_box_rel]
                dp_x.extend(dp_x_norm)
                dp_y.extend(dp_y_norm)
                dp_u.extend(dp_annotations['dp_U'])
                dp_v.extend(dp_annotations['dp_V'])
            else:
                dp_num_points.append(0)

    if (remove_non_person_images
            and not any(name == b'person' for name in category_names)):
        return (key, None, num_annotations_skipped,
                num_keypoint_annotation_skipped,
                num_densepose_annotation_skipped)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    if include_keypoint:
        feature_dict['image/object/keypoint/x'] = (
            dataset_util.float_list_feature(keypoints_x))
        feature_dict['image/object/keypoint/y'] = (
            dataset_util.float_list_feature(keypoints_y))
        feature_dict['image/object/keypoint/num'] = (
            dataset_util.int64_list_feature(num_keypoints))
        feature_dict['image/object/keypoint/visibility'] = (
            dataset_util.int64_list_feature(keypoints_visibility))
        feature_dict['image/object/keypoint/text'] = (
            dataset_util.bytes_list_feature(keypoints_name))
        num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) -
                                           num_keypoint_annotation_used)
    if include_densepose:
        feature_dict['image/object/densepose/num'] = (
            dataset_util.int64_list_feature(dp_num_points))
        feature_dict['image/object/densepose/part_index'] = (
            dataset_util.int64_list_feature(dp_part_index))
        feature_dict['image/object/densepose/x'] = (
            dataset_util.float_list_feature(dp_x))
        feature_dict['image/object/densepose/y'] = (
            dataset_util.float_list_feature(dp_y))
        feature_dict['image/object/densepose/u'] = (
            dataset_util.float_list_feature(dp_u))
        feature_dict['image/object/densepose/v'] = (
            dataset_util.float_list_feature(dp_v))
        num_densepose_annotation_skipped = (len(densepose_annotations_dict) -
                                            num_densepose_annotation_used)

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return (key, example, num_annotations_skipped,
            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
Ejemplo n.º 22
0
    def process(self, image_id):
        """Builds a tf.Example given an image id.

    Args:
      image_id: the image id of the associated image

    Returns:
      List of tf.Examples.
    """

        image = self._image_dict[image_id]
        annotations = self._annotation_dict[image_id]
        image_height = image['height']
        image_width = image['width']
        filename = image['file_name']
        image_id = image['id']
        image_location_id = image['location']

        image_datetime = str(image['date_captured'])

        image_sequence_id = str(image['seq_id'])
        image_sequence_num_frames = int(image['seq_num_frames'])
        image_sequence_frame_num = int(image['frame_num'])

        full_path = os.path.join(self._image_directory, filename)

        try:
            # Ensure the image exists and is not corrupted
            with tf.io.gfile.GFile(full_path, 'rb') as fid:
                encoded_jpg = fid.read()
            encoded_jpg_io = io.BytesIO(encoded_jpg)
            image = PIL.Image.open(encoded_jpg_io)
            # Ensure the image can be read by tf
            with tf.Graph().as_default():
                image = tf.image.decode_jpeg(encoded_jpg, channels=3)
                init_op = tf.initialize_all_tables()
                with tf.Session() as sess:
                    sess.run(init_op)
                    sess.run(image)
        except Exception as e:  # pylint: disable=broad-except
            # The image file is missing or corrupt
            tf.logging.error(str(e))
            return []

        key = hashlib.sha256(encoded_jpg).hexdigest()
        feature_dict = {
            'image/height':
            dataset_util.int64_feature(image_height),
            'image/width':
            dataset_util.int64_feature(image_width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(str(image_id).encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/location':
            dataset_util.bytes_feature(str(image_location_id).encode('utf8')),
            'image/seq_num_frames':
            dataset_util.int64_feature(image_sequence_num_frames),
            'image/seq_frame_num':
            dataset_util.int64_feature(image_sequence_frame_num),
            'image/seq_id':
            dataset_util.bytes_feature(image_sequence_id.encode('utf8')),
            'image/date_captured':
            dataset_util.bytes_feature(image_datetime.encode('utf8'))
        }

        num_annotations_skipped = 0
        if annotations:
            xmin = []
            xmax = []
            ymin = []
            ymax = []
            category_names = []
            category_ids = []
            area = []

            for object_annotations in annotations:
                if 'bbox' in object_annotations and self._keep_bboxes:
                    (x, y, width, height) = tuple(object_annotations['bbox'])
                    if width <= 0 or height <= 0:
                        num_annotations_skipped += 1
                        continue
                    if x + width > image_width or y + height > image_height:
                        num_annotations_skipped += 1
                        continue
                    xmin.append(float(x) / image_width)
                    xmax.append(float(x + width) / image_width)
                    ymin.append(float(y) / image_height)
                    ymax.append(float(y + height) / image_height)
                    if 'area' in object_annotations:
                        area.append(object_annotations['area'])
                    else:
                        # approximate area using l*w/2
                        area.append(width * height / 2.0)

                category_id = int(object_annotations['category_id'])
                category_ids.append(category_id)
                category_names.append(
                    self._category_dict[category_id]['name'].encode('utf8'))

            feature_dict.update({
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmin),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmax),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymin),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymax),
                'image/object/class/text':
                dataset_util.bytes_list_feature(category_names),
                'image/object/class/label':
                dataset_util.int64_list_feature(category_ids),
                'image/object/area':
                dataset_util.float_list_feature(area),
            })

            # For classification, add the first category to image/class/label and
            # image/class/text
            if not category_ids:
                feature_dict.update({
                    'image/class/label':
                    dataset_util.int64_list_feature([0]),
                    'image/class/text':
                    dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
                })
            else:
                feature_dict.update({
                    'image/class/label':
                    dataset_util.int64_list_feature([category_ids[0]]),
                    'image/class/text':
                    dataset_util.bytes_list_feature([category_names[0]]),
                })

        else:
            # Add empty class if there are no annotations
            feature_dict.update({
                'image/class/label':
                dataset_util.int64_list_feature([0]),
                'image/class/text':
                dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
            })

        example = tf.train.Example(features=tf.train.Features(
            feature=feature_dict))
        self._num_examples_processed.inc(1)

        return [(example)]
def dict_to_tf_example(data, image_subdirectory='JPEGImages'):
    """
    Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
            running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
                                dataset  (default: False).
    1image_subdirectory: String specifying subdirectory within the
                        PASCAL dataset directory holding the actual image data.

    Returns:
        example: The converted tf.Example.

    Raises:
        ValueError: if the image pointed to by data['filename']
                    is not a valid JPEG
  """

    full_path = os.path.join('/home/lion/dataset', data['file'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    poses = []
    classes = []
    classes_text = []

    width = int(data['width'])
    height = int(data['height'])
    xmin.append(float(data['bbox']['x1']) / width)
    xmax.append(float(data['bbox']['x2']) / width)

    if int(data['category_class']) == 2:  #excepyion about bottom
        ymin.append(float(data['bbox']['y1'] + 8) / height)
    else:
        ymin.append(float(data['bbox']['y1']) / height)

    if int(data['category_class']) == 1:  #exception about top
        ymax.append(float(data['bbox']['y2'] - 8) / height)
    else:
        ymax.append(float(data['bbox']['y2']) / height)

    classes_text.append(data['category_name'].encode('utf8'))
    if data['category_class'] == str(1):
        classes.append(int(data['category_class']))
    if data['category_class'] == str(3):
        classes.append(int(2))
    difficult = [0]
    truncated = [0]
    poses.append('Frontal'.encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['file'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(str(data['_id']).encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Ejemplo n.º 24
0
def dict_to_tf_example(annotation, dataset_directory, label_map_dict):
    im_path = str(annotation['relative_im_path'])
    cls = int(annotation['class'])
    x1 = int(annotation['bbox_x1'])
    y1 = int(annotation['bbox_y1'])
    x2 = int(annotation['bbox_x2'])
    y2 = int(annotation['bbox_y2'])

    # read image
    full_img_path = os.path.join(dataset_directory, im_path)

    # read in the image and make a thumbnail of it
    max_size = 500, 500
    big_image = PIL.Image.open(full_img_path)
    width, height = big_image.size
    big_image.thumbnail(max_size, PIL.Image.ANTIALIAS)
    full_thumbnail_path = os.path.splitext(full_img_path)[0] + '_thumbnail.jpg'
    big_image.save(full_thumbnail_path)

    with tf.gfile.GFile(full_thumbnail_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # calculate box using original image coordinates
    xmin.append(max(0, x1 / width))
    xmax.append(min(1.0, x2 / width))
    ymin.append(max(0, y1 / height))
    ymax.append(min(1.0, y2 / height))

    # set width and height to thumbnail size for tfrecord ingest
    width, height = image.size

    classes = []
    classes_text = []

    label = ''
    for name, val in label_map_dict.items():
        if val == cls:
            label = name
            break

    classes_text.append(label.encode('utf8'))
    classes.append(label_map_dict[label])

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(full_img_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(full_img_path.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return example
Ejemplo n.º 25
0
def create_tf_example(image, annotations_list, image_dir, category_index,
                      total_targets):

    filename = image['filename']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    image_array = Image.open(full_path)
    image_array = np.asarray(image_array)
    image_height = image_array.shape[0]
    image_width = image_array.shape[1]

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    category_names = []
    category_ids = []
    area = []
    num_annotations_skipped = 0

    for object_annotations in annotations_list:
        for target in object_annotations['targets']:
            total_targets += 1
            cx = target['center'][0]
            cy = target['center'][1]

            x = target['ul'][0]
            y = target['ul'][1]

            width = (cx - x) * 2
            height = (cy - y) * 2

            if width <= 0 or height <= 0:
                num_annotations_skipped += 1
                continue
            if x + width > image_width or y + height > image_height:
                num_annotations_skipped += 1
                continue
            xmin.append(float(x) / image_width)
            xmax.append(float(x + width) / image_width)
            ymin.append(float(y) / image_height)
            ymax.append(float(y + height) / image_height)
            category_id = int(dataset._category_lookup(target['category']))
            category_ids.append(category_id)
            category_names.append(target['category'].encode('utf8'))
            area.append(target['bbox_area'])

    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped, total_targets
def create_label_tfrecord(images_path):
	'''
	Create label list and TFRecords for Object Detection API
	'''





	(_, folders, files) = next(os.walk(images_path))
	if "nologo" in folders:
		folders.remove("nologo")

	if len(folders) != 0:
		for subfolder in folders:
			sub_path = os.path.join(images_path, subfolder)

			create_label_tfrecord(sub_path)
	else:

		LABELS_FILE = "metadata.txt"
		#open metadata and load to memory
		global xmin,ymin,xmax,ymax,label,filename

		labels_file_path=os.path.join(images_path,LABELS_FILE)
		for filename in glob.glob(labels_file_path):
			with open(filename, 'rb') as f:
				metaDataLine= list(f)[-1]
			metavalues=metaDataLine.split(b',')
			label=metavalues[0]
			xmin=int(metavalues[1])
			ymin=int(metavalues[2])
			xmax=xmin+int(metavalues[3])
			ymax=ymin+int(metavalues[4])

		if LABELS_FILE in files:
			files.remove(LABELS_FILE)
		for file in files:
			if imghdr.what(os.path.join(images_path,file)) == 'jpeg' :
				#print('its a jpeg')
				image_data = tf.gfile.FastGFile(os.path.join(images_path,file), 'rb').read()
				image_format = 'jpeg'
				height = IMAGE_HEIGHT
				width = IMAGE_WIDTH
				xmins = []  # List of normalized left x coordinates in bounding box (1 per box)
				xmins.append(float(xmin)/width)
				xmaxs = []  # List of normalized right x coordinates in bounding box
				# (1 per box)
				xmaxs.append(float(xmax)/width)
				ymins = []  # List of normalized top y coordinates in bounding box (1 per box)
				ymins.append(float(ymin)/height)

				ymaxs = []  # List of normalized bottom y coordinates in bounding box
				ymaxs.append(float(ymax)/height)
			# (1 per box)
				classes_text = []  # List of string class name of bounding box (1 per box)
				classes_text.append(label)
				classes = []  # List of integer class id of bounding box (1 per box)
				classes.append(labelDict.get(label))

				tf_example = tf.train.Example(features=tf.train.Features(feature={
					'image/height': dataset_util.int64_feature(height),
					'image/width': dataset_util.int64_feature(width),
					b'image/filename': dataset_util.bytes_feature(filename),
					b'image/source_id': dataset_util.bytes_feature(filename),
					b'image/encoded': dataset_util.bytes_feature(image_data),
					b'image/format': dataset_util.bytes_feature(image_format),
					'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
					'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
					'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
					'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
					'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
					'image/object/class/label': dataset_util.int64_list_feature(classes),
				}))

				tfrecord_examples.append(tf_example)
			else:
				print('its not a jpeg. ignoring image')
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    # if isinstance( image_id, unicode ):
    #     import pdb
    #     pdb.set_trace()

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    # if isinstance( image_id, unicode ):
    #   import pdb
    #   pdb.set_trace()
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Ejemplo n.º 28
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  mask_np = np.asarray(mask)
  nonbackground_indices_x = np.any(mask_np != 2, axis=0)
  nonbackground_indices_y = np.any(mask_np != 2, axis=1)
  nonzero_x_indices = np.where(nonbackground_indices_x)
  nonzero_y_indices = np.where(nonbackground_indices_y)

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))

    if faces_only:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])
    else:
      xmin = float(np.min(nonzero_x_indices))
      xmax = float(np.max(nonzero_x_indices))
      ymin = float(np.min(nonzero_y_indices))
      ymax = float(np.max(nonzero_y_indices))

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
      mask_remapped = (mask_np != 2).astype(np.uint8)
      masks.append(mask_remapped)

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
    if mask_type == 'numerical':
      mask_stack = np.stack(masks).astype(np.float32)
      masks_flattened = np.reshape(mask_stack, [-1])
      feature_dict['image/object/mask'] = (
          dataset_util.float_list_feature(masks_flattened.tolist()))
    elif mask_type == 'png':
      encoded_mask_png_list = []
      for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
      feature_dict['image/object/mask'] = (
          dataset_util.bytes_list_feature(encoded_mask_png_list))

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Ejemplo n.º 29
0
def dict_to_tf_example(data, label_map_dict, img_path):

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = image.width
    height = image.height

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []

    for shape in data['Layers']['Layer']['Shapes']['Shape']:

        text = shape['BlockText']['Text'].text
        if not (text.startswith('Panel') or text.startswith('panel')):
            continue

        attrib = shape['Data']['Extent'].attrib
        x = float(attrib['X'])
        y = float(attrib['Y'])
        w = float(attrib['Width'])
        h = float(attrib['Height'])

        xmin = x
        xmax = x + w
        ymin = y
        ymax = y + h

        xmin /= width
        ymin /= height
        xmax /= width
        ymax /= height

        if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01:
            print(img_path)

        xmins.append(xmin)
        ymins.append(ymin)
        xmaxs.append(xmax)
        ymaxs.append(ymax)

        class_name = 'Panel'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue

    difficult_obj.append(int(difficult))

    xmin.append(float(obj['bndbox']['xmin']) / width)
    ymin.append(float(obj['bndbox']['ymin']) / height)
    xmax.append(float(obj['bndbox']['xmax']) / width)
    ymax.append(float(obj['bndbox']['ymax']) / height)
    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }))
  return example
      classes_text.append(str(row['class']).encode('utf8'))
      classes.append(class_dict[str(row['class'])])

   tf_example = tf.train.Example(features=tf.train.Features(
       feature={
           'image/height': dataset_util.int64_feature(height),
           'image/width': dataset_util.int64_feature(width),
           'image/filename': dataset_util.bytes_feature(filename),
           'image/source_id': dataset_util.bytes_feature(filename),
           'image/encoded': dataset_util.bytes_feature(encoded_jpg),
           'image/format': dataset_util.bytes_feature(image_format),
           'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
           'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
           'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
           'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
           'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
           'image/object/class/label': dataset_util.int64_list_feature(classes), }))
   return tf_example


def class_dict_from_pbtxt(pbtxt_path):
   # open file, strip \n, trim lines and keep only
   # lines beginning with id or display_name

   with open(pbtxt_path, 'r', encoding='utf-8-sig') as f:
      data = f.readlines()

   name_key = None
   if any('display_name:' in s for s in data):
      name_key = 'display_name:'
   elif any('name:' in s for s in data):
Ejemplo n.º 32
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
def dict_to_tf_example(
    data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory="./"
):
    """Convert XML derived dict to tf.Example proto.
    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.
    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.
    Returns:
      example: The converted tf.Example.
    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(data["folder"], image_subdirectory, data["filename"])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != "JPEG":
        raise ValueError("Image format not JPEG")
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data["size"]["width"])
    height = int(data["size"]["height"])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if "object" in data:
        for obj in data["object"]:
            difficult = bool(int(obj["difficult"]))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj["bndbox"]["xmin"]) / width)
            ymin.append(float(obj["bndbox"]["ymin"]) / height)
            xmax.append(float(obj["bndbox"]["xmax"]) / width)
            ymax.append(float(obj["bndbox"]["ymax"]) / height)
            classes_text.append(obj["name"].encode("utf8"))
            classes.append(label_map_dict[obj["name"]])
            truncated.append(int(obj["truncated"]))
            poses.append(obj["pose"].encode("utf8"))

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                "image/height": dataset_util.int64_feature(height),
                "image/width": dataset_util.int64_feature(width),
                "image/filename": dataset_util.bytes_feature(data["filename"].encode("utf8")),
                "image/source_id": dataset_util.bytes_feature(data["filename"].encode("utf8")),
                "image/key/sha256": dataset_util.bytes_feature(key.encode("utf8")),
                "image/encoded": dataset_util.bytes_feature(encoded_jpg),
                "image/format": dataset_util.bytes_feature("jpeg".encode("utf8")),
                "image/object/bbox/xmin": dataset_util.float_list_feature(xmin),
                "image/object/bbox/xmax": dataset_util.float_list_feature(xmax),
                "image/object/bbox/ymin": dataset_util.float_list_feature(ymin),
                "image/object/bbox/ymax": dataset_util.float_list_feature(ymax),
                "image/object/class/text": dataset_util.bytes_list_feature(classes_text),
                "image/object/class/label": dataset_util.int64_list_feature(classes),
                "image/object/difficult": dataset_util.int64_list_feature(difficult_obj),
                "image/object/truncated": dataset_util.int64_list_feature(truncated),
                "image/object/view": dataset_util.bytes_list_feature(poses),
            }
        )
    )
    return example
Ejemplo n.º 34
0
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory):
    """Convert JSON derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
    data: dict holding fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
        dataset directory holding the actual image data.

    Returns:
        example: The converted tf.Example.

    Raises:
        ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """


    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    width, height = data['image_w_h'] # Image size

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in data['objects']:
        x = float(box['x_y_w_h'][0])
        y = float(box['x_y_w_h'][1])
        w = float(box['x_y_w_h'][2])
        h = float(box['x_y_w_h'][3])
        xmins.append(float(x / width))
        xmaxs.append(float((x + w) / width))
        ymins.append(float(y / height))
        ymaxs.append(float((y + h) / height))
        class_name = box['label']
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return example
Ejemplo n.º 35
0
def dict_to_tf_example(data, image_dir, label_map_dict):
    """
    This function normalizes the bounding box coordinates provided by
    the raw data.

    Arguments:
    :param data: dict holding XML fields for a single image (obtained by running
    dataset_util.recursive_parse_xml_to_dict)
    :param image_dir: Path to image director
    :param label_map_dict: A map from string label names to integers ids.
    :return:
        example: The converted tf.Example.
    """

    full_path = os.path.join(image_dir, data['filename'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []

    try:
        for obj in data['object']:
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])

    except KeyError:
        print(data['filename'] + ' without objects!')

    difficult_obj = [0] * len(classes)
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj)
        }))

    return example
Ejemplo n.º 36
0
def dict_to_tf_example(data,
                       dataset_directory,
                       set_name,
                       label_map_dict,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      set_name: name of the set training, validation or test
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(set_name, data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' not in data:
        data['object'] = []
    for obj in data['object']:
        if obj['name'] in label_map_dict:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Ejemplo n.º 37
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
  """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]

  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
      standard_fields.TfExampleFields.source_id:
          dataset_util.bytes_feature(image_id),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsOccluded.as_matrix().astype(int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsTruncated.as_matrix().astype(int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsDepiction.as_matrix().astype(int))

  return tf.train.Example(features=tf.train.Features(feature=feature_map))
Ejemplo n.º 38
0
def dict_to_tf_example(example):

    filename = example['filename']
    filename = filename.encode()

    with tf.io.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    width, height = image.size

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmins = []  # left x-coordinate
    ymins = []  # right x-coordinate
    xmaxs = []  # top y-coordinate
    ymaxs = []  # buttom y-coordinate
    classes = []  # class id
    classes_text = []  # class name

    for box in example['annotations']:
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin'] + box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABELS_MAP[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes)
        }))

    return tf_example
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    mask_np = np.asarray(mask)
    nonbackground_indices_x = np.any(mask_np != 2, axis=0)
    nonbackground_indices_y = np.any(mask_np != 2, axis=1)
    nonzero_x_indices = np.where(nonbackground_indices_x)
    nonzero_y_indices = np.where(nonbackground_indices_y)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))

            if faces_only:
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
            else:
                xmin = float(np.min(nonzero_x_indices))
                xmax = float(np.max(nonzero_x_indices))
                ymin = float(np.min(nonzero_y_indices))
                ymax = float(np.max(nonzero_y_indices))

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)
            class_name = get_class_name_from_filename(data['filename'])
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))
            if not faces_only:
                mask_remapped = (mask_np != 2).astype(np.uint8)
                masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    if not faces_only:
        if mask_type == 'numerical':
            mask_stack = np.stack(masks).astype(np.float32)
            masks_flattened = np.reshape(mask_stack, [-1])
            feature_dict['image/object/mask'] = (
                dataset_util.float_list_feature(masks_flattened.tolist()))
        elif mask_type == 'png':
            encoded_mask_png_list = []
            for mask in masks:
                img = PIL.Image.fromarray(mask)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Ejemplo n.º 40
0
    def create_tf_example(self, number_of_dice: int, creation_seed: int):
        """Creates a labelled TFExample with the given number of dice at the corresponding creation seed."""
        image, dice_bounding_boxes, dice_top_face_indices, dice_y_rotations, dot_boxes_list, camera_matrix = self.get_dice_boxes_indices_and_dot_boxes(
            number_of_dice, creation_seed)
        camera_intrinsic = camera_matrix.ravel()
        camera_distortion = np.zeros(5)

        dice_y_rotations = [
            0.000001 if dice_rot is 0 else dice_rot
            for dice_rot in dice_y_rotations
        ]  #"Zero" rotation is a special label for 'no rotation data defined'

        all_dot_boxes = [
            box for dot_boxes_per_dice in dot_boxes_list
            for box in dot_boxes_per_dice
        ]
        all_dot_classes = [0] * len(all_dot_boxes) if len(
            all_dot_boxes) > 0 else []
        all_dot_y_rotations = [0] * len(all_dot_boxes) if len(
            all_dot_boxes) > 0 else []

        all_y_rotations = dice_y_rotations + all_dot_y_rotations

        all_boxes = dice_bounding_boxes + all_dot_boxes
        xmins = [box[0] / self.image_width for box in all_boxes]
        ymins = [box[1] / self.image_height for box in all_boxes]
        xmaxs = [(box[0] + box[2]) / self.image_width for box in all_boxes]
        ymaxs = [(box[1] + box[3]) / self.image_height for box in all_boxes]

        all_classes_unshifted = dice_top_face_indices + all_dot_classes
        all_class_strings = [
            str.encode(self.class_string_dictionary[class_id])
            for class_id in all_classes_unshifted
        ]
        all_class_ids = [
            id + 1 for id in all_classes_unshifted
        ]  #Since label maps for TF object detection start at 1, it seems

        image_resized = tf.image.resize_with_pad(
            image,
            self.target_image_width,
            self.target_image_height,
            method=tf.image.ResizeMethod.BILINEAR,
            antialias=False)
        image_resized_normalized = image_resized / 255.0
        image_resized_converted = tf.image.convert_image_dtype(
            image_resized_normalized, dtype=tf.uint16, saturate=False)
        encoded_image_data = tf.image.encode_png(
            image_resized_converted.numpy())
        encoded_image_data_np = encoded_image_data.numpy()
        image_format = b'png'
        image_name = str.encode("image_{}_{}".format(number_of_dice,
                                                     creation_seed))
        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(self.image_height),
                'image/width':
                dataset_util.int64_feature(self.image_width),
                'image/filename':
                dataset_util.bytes_feature(image_name),
                'image/source_id':
                dataset_util.bytes_feature(image_name),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data_np),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(all_class_strings),
                'image/object/class/label':
                dataset_util.int64_list_feature(all_class_ids),
                'image/object/rotation/y_angle':
                dataset_util.float_list_feature(all_y_rotations),
                'image/camera/intrinsic':
                dataset_util.float_list_feature(camera_intrinsic),
                'image/camera/distortion':
                dataset_util.float_list_feature(camera_distortion),
            }))
        return tf_example
def create_one_tf_example(data, class_map, path='', channels=4):
    '''
      Creates a tf.Example proto from sample cat image.

      Args:
        data with a dictionnary containing path, width etc

      Returns:
        example: The created tf.Example.
    '''

    height = data['height']
    width = data['width']
    filename = data['filepath']
    format_img = data['format']
    #with tf.gfile.GFile(path+filename[0], 'r') as fid:
    #print(fid.shape)
    #encoded_image_data = fid.read()

    # we consider that each image contain one channel
    for i in range(len(filename)):
        img_temp = cv2.imread(filename[i])
        if img_temp is None:
            print('error loading img')
        temp = np.mean(img_temp, axis=-1)
        if temp is None:
            print("can't open image, check path parameters ! ")
            return
        temp = np.expand_dims(temp, axis=-1)
        if i == 0:
            img = temp
        else:
            img = np.concatenate([img, temp], axis=-1)
    #if img.shape != (1920, 1920, 4):
    #   print(img.shape)
    img = img.astype(np.uint8)
    img_encoded = img.tostring()
    #filename = filename.encode('utf8')
    if format_img == 'png':
        image_format = b'png'
    elif format_img == 'jpg':
        image_format = b'jpg'
    else:
        print('Error format retry')

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for box in data['bboxes']:
        xmins.append(box['xmin'] / float(width))
        xmaxs.append(box['xmax'] / float(width))
        ymins.append(box['ymin'] / float(height))
        ymaxs.append(box['ymax'] / float(height))

        classes_text.append(box['class'].encode('utf8'))
        classes.append(class_map[box['class']])

    # with tf.gfile.GFile(os.path.join(path, '{}'.format(filename)), 'rb') as fid:
    #print(filename)
    #print(path+filename)
    #print(cv2.imread((path+filename).replace('\\','/')))

    feat = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        #'image/filename': dataset_util.bytes_feature(filename),
        #'image/source_id': dataset_util.bytes_feature(filename),
        'image/channels': dataset_util.int64_feature(channels),
        'image/encoded': dataset_util.bytes_feature(img_encoded),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes)
    }
    tf_example = tf.train.Example(features=tf.train.Features(feature=feat))
    return tf_example
Ejemplo n.º 42
0
def bbox_to_tf_example(bbox, label, image_file,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
  """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, image_file)
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width, height = image.size

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []

  for b, l in zip(bbox, label):
    xmin.append(min(float(b[0]) / width, 1.0))
    ymin.append(min(float(b[1]) / height, 1.0))
    xmax.append(min(float(b[0]+b[2]) / width, 1.0))
    ymax.append(min(float(b[1]+b[3]) / height, 1.0))
    class_name = get_class_name_from_label(l)
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          image_file.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          image_file.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes)
  }))
  return example
def prepare_example(image_path, annotations, label_map_dict):
  """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
  with tf.gfile.GFile(image_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = pil.open(encoded_png_io)
  image = np.asarray(image)

  key = hashlib.sha256(encoded_png).hexdigest()

  width = int(image.shape[1])
  height = int(image.shape[0])

  xmin_norm = annotations['2d_bbox_left'] / float(width)
  ymin_norm = annotations['2d_bbox_top'] / float(height)
  xmax_norm = annotations['2d_bbox_right'] / float(width)
  ymax_norm = annotations['2d_bbox_bottom'] / float(height)

  difficult_obj = [0]*len(xmin_norm)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
      'image/object/class/text': dataset_util.bytes_list_feature(
          [x.encode('utf8') for x in annotations['type']]),
      'image/object/class/label': dataset_util.int64_list_feature(
          [label_map_dict[x] for x in annotations['type']]),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.float_list_feature(
          annotations['truncated']),
      'image/object/alpha': dataset_util.float_list_feature(
          annotations['alpha']),
      'image/object/3d_bbox/height': dataset_util.float_list_feature(
          annotations['3d_bbox_height']),
      'image/object/3d_bbox/width': dataset_util.float_list_feature(
          annotations['3d_bbox_width']),
      'image/object/3d_bbox/length': dataset_util.float_list_feature(
          annotations['3d_bbox_length']),
      'image/object/3d_bbox/x': dataset_util.float_list_feature(
          annotations['3d_bbox_x']),
      'image/object/3d_bbox/y': dataset_util.float_list_feature(
          annotations['3d_bbox_y']),
      'image/object/3d_bbox/z': dataset_util.float_list_feature(
          annotations['3d_bbox_z']),
      'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
          annotations['3d_bbox_rot_y']),
  }))

  return example
def create_tf_example(example):

    # Udacity real data set
    height = 1096  # Image height
    width = 1368  # Image width

    filename = example[
        'filename']  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin'] + box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    xmins.append(annotation['x'])
                    xmaxs.append(annotation['x2'])
                    ymins.append(annotation['y'])
                    ymaxs.append(annotation['y2'])
                    classes_text.append(annotation['label'].encode('utf8'))
                    classes.append(1) # temporary, I need to assign labels to actual ids
                    difficult_obj.append(0)
                    truncated.append(0)
                    poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')
Ejemplo n.º 46
0
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       image_subdirectory='JPEGImages'):

    full_path = os.path.join(dataset_directory, image_subdirectory,
                             data['filename'] + '.png')
    if not os.path.isfile(full_path):
        full_path = os.path.join(dataset_directory, image_subdirectory,
                                 data['filename'] + '.jpg')
    # print('full_path', full_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format not in ['JPEG', 'PNG']:
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    if 'object' not in data.keys():
        print('No label detected in the xml format')
    else:
        for obj in data['object']:
            if obj['name'] in selected:
                xmin.append(float(obj['bndbox']['xmin']) / width)
                ymin.append(float(obj['bndbox']['ymin']) / height)
                xmax.append(float(obj['bndbox']['xmax']) / width)
                ymax.append(float(obj['bndbox']['ymax']) / height)
                classes_text.append(obj['name'].encode('utf8'))
                classes.append(label_golden[obj['name']])

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return example
Ejemplo n.º 47
0
def create_tf_example(group, path, class_dict):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        if set(['xmin_rel', 'xmax_rel', 'ymin_rel',
                'ymax_rel']).issubset(set(row.index)):
            xmin = row['xmin_rel']
            xmax = row['xmax_rel']
            ymin = row['ymin_rel']
            ymax = row['ymax_rel']

        elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)):
            xmin = row['xmin'] / width
            xmax = row['xmax'] / width
            ymin = row['ymin'] / height
            ymax = row['ymax'] / height

        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_dict[row['class']])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Ejemplo n.º 48
0
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    # width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / row['width'])
        xmaxs.append(row['xmax'] / row['width'])
        ymins.append(row['ymin'] / row['height'])
        ymaxs.append(row['ymax'] / row['height'])
        classes_text.append(row['class'])
        if row['class'] == 'bluemoon_shouxi_fengqingbailai_500g':
            classes.append(1)
        elif row['class'] == 'bluemoon_zhizun_suyalanxiang_660g':
            classes.append(2)
        elif row['class'] == 'bluemoon_zhizun_suyalanxiang_box':
            classes.append(3)
        elif row['class'] == 'bluemoon_zhizun_suyalanxiang_bag_600g':
            classes.append(4)
        elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_box':
            classes.append(5)
        elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_bag_600g':
            classes.append(6)
        elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_660g':
            classes.append(7)
        elif row['class'] == 'bluemoon_shouxi_shuangyong_fengqingbailai_500g':
            classes.append(8)
        elif row['class'] == 'bluemoon_zhizun_new_suyalanxiang_660g':
            classes.append(9)
        elif row['class'] == 'bluemoon_zhizun_new_qingyunmeixiang_660g':
            classes.append(10)
        elif row['class'] == 'bluemoon_baobao_red_500g':
            classes.append(11)
        elif row['class'] == 'bluemoon_jiecibao_500g':
            classes.append(12)
        elif row['class'] == 'bluemoon_lanseyueguang_bai_600g':
            classes.append(13)
        elif row['class'] == 'bluemoon_shenceng_500g':
            classes.append(14)
        elif row['class'] == 'bluemoon_weinuo_500g':
            classes.append(15)
        elif row['class'] == 'bluemoon_yurong_500g':
            classes.append(16)
        elif row['class'] == 'bluemoon_ertongxishou_caomei_225g':
            classes.append(17)
        elif row['class'] == 'bluemoon_ertongxishou_qingpingguo_225g':
            classes.append(18)
        elif row['class'] == 'bluemoon_ertongxishou_tiancheng_225g':
            classes.append(19)
        elif row['class'] == 'bluemoon_guopaoduoduo_300ml':
            classes.append(20)
        elif row['class'] == 'bluemoon_xishouye_luhui_500g':
            classes.append(21)
        elif row['class'] == 'bluemoon_xishouye_weie_500g':
            classes.append(22)
        elif row['class'] == 'bluemoon_xishouye_yejuhua_500g':
            classes.append(23)
        elif row['class'] == 'bluemoon_84xiaoduye_1.2kg':
            classes.append(24)
        elif row['class'] == 'bluemoon_bolishui_500g':
            classes.append(25)
        elif row['class'] == 'bluemoon_chaqing_500g':
            classes.append(26)
        elif row['class'] == 'bluemoon_dibanqingjie_600g':
            classes.append(27)
        elif row['class'] == 'bluemoon_piaobaishui_600g':
            classes.append(28)
        elif row['class'] == 'bluemoon_quannengshui_500g':
            classes.append(29)
        elif row['class'] == 'bluemoon_roushunji_500g':
            classes.append(30)
        elif row['class'] == 'bluemoon_yilingjing_500g':
            classes.append(31)
        elif row['class'] == 'bluemoon_yiwuxiaoduye_1kg':
            classes.append(32)
        elif row['class'] == 'bluemoon_youwukexing_500g':
            classes.append(33)
        elif row['class'] == 'bluemoon_liangbai_xunyicao_2kg':
            classes.append(34)
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(row['height']),
            'image/width':
            dataset_util.int64_feature(row['width']),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example