def testDecodeObjectGroupOf(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_group_of = [0, 1]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/group_of':
                    dataset_util.int64_list_feature(object_group_of),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        [bool(item) for item in object_group_of],
        tensor_dict[fields.InputDataFields.groundtruth_group_of])
def create_tf_example(example, writer):
  height = example['height']
  width = example['width']
  filename = example['filename']
  encoded_image_data = example['encoded_image_data']
  image_format = example['image_format']

  bboxes = example['bbox']
  xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box
  ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box
  classes_text = example['class_text']
  classes = example['class_idx']

  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  writer.write(tf_example.SerializeToString())
  def testDecodePngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
    decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
    encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
    decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
    encoded_masks = [encoded_png_1, encoded_png_2]
    decoded_masks = np.stack([decoded_png_1, decoded_png_2])
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks)
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        decoded_masks,
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
  def testDecodeJpegImage(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    decoded_jpeg = self._DecodeImage(encoded_jpeg)
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature('jpeg'),
                'image/source_id': dataset_util.bytes_feature('image_id'),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
                         get_shape().as_list()), [None, None, 3])
    self.assertAllEqual((tensor_dict[fields.InputDataFields.
                                     original_image_spatial_shape].
                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
                                            original_image_spatial_shape])
    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
  def testDecodeEmptyPngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    encoded_masks = []
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks),
                'image/height':
                    dataset_util.int64_feature(10),
                'image/width':
                    dataset_util.int64_feature(10),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
          [0, 10, 10])
  def testDecodeDefaultGroundtruthWeights(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
                         .get_shape().as_list()), [None, 4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
                        np.ones(2, dtype=np.float32))
  def testDecodeObjectWeight(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_weights = [0.75, 1.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/weight':
                    dataset_util.float_list_feature(object_weights),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
                         .get_shape().as_list()), [None])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(object_weights,
                        tensor_dict[fields.InputDataFields.groundtruth_weights])
def create_tf_example(row):
    full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [class_text_to_int(row['class'])]

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
  def testDecodeAdditionalChannels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    additional_channel_tensor = np.random.randint(
        256, size=(4, 5, 1)).astype(np.uint8)
    encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
    decoded_additional_channel = self._DecodeImage(encoded_additional_channel)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/additional_channels/encoded':
                    dataset_util.bytes_list_feature(
                        [encoded_additional_channel] * 2),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/source_id':
                    dataset_util.bytes_feature('image_id'),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        num_additional_channels=2)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          np.concatenate([decoded_additional_channel] * 2, axis=2),
          tensor_dict[fields.InputDataFields.image_additional_channels])
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
    image_width = 3

    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
         .get_shape().as_list()), [4, 5, 3])

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
                         .get_shape().as_list()), [4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        instance_masks.astype(np.float32),
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
    self.assertAllEqual(object_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
  def testDecodeKeypoint(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
    keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
                'image/object/keypoint/y':
                    dataset_util.float_list_feature(keypoint_ys),
                'image/object/keypoint/x':
                    dataset_util.float_list_feature(keypoint_xs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
                         .get_shape().as_list()), [None, 4])
    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
         .as_list()), [2, 3, 2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
                                bbox_xmaxs]).transpose()
    self.assertAllEqual(expected_boxes,
                        tensor_dict[fields.InputDataFields.groundtruth_boxes])
    self.assertAllEqual(
        2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])

    expected_keypoints = (
        np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
    self.assertAllEqual(
        expected_keypoints,
        tensor_dict[fields.InputDataFields.groundtruth_keypoints])
 def testDecodeImageLabels(self):
   image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
   encoded_jpeg = self._EncodeImage(image_tensor)
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
               'image/format': dataset_util.bytes_feature('jpeg'),
               'image/class/label': dataset_util.int64_list_feature([1, 2]),
           })).SerializeToString()
   example_decoder = tf_example_decoder.TfExampleDecoder()
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 2]))
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded':
                   dataset_util.bytes_feature(encoded_jpeg),
               'image/format':
                   dataset_util.bytes_feature('jpeg'),
               'image/class/text':
                   dataset_util.bytes_list_feature(['dog', 'cat']),
           })).SerializeToString()
   label_map_string = """
     item {
       id:3
       name:'cat'
     }
     item {
       id:1
       name:'dog'
     }
   """
   label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
   with tf.gfile.Open(label_map_path, 'wb') as f:
     f.write(label_map_string)
   example_decoder = tf_example_decoder.TfExampleDecoder(
       label_map_proto_file=label_map_path)
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     sess.run(tf.tables_initializer())
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 3]))
def dict_to_tf_example(data,
                       label_map_dict,
                       data_dir):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    """
  
    img_path = os.path.join(data_dir, data.replace("mask", "images"))
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = 512
    height = 512

    classes = []
    classes_text = []
    encoded_mask_png_list = []
    mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255
    output = io.BytesIO()
    encoded_mask_png_list.append(mask_png.save(output, mask_png))
    class_name = 'water'
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])


    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def createTfExample(singleFileData, path):
    # use TensorFlow's GFile function to open the .jpg image matching the current box data
    with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile:
        tensorFlowImage = tensorFlowImageFile.read()
    # end with

    # get the image width and height via converting from a TensorFlow image to an io library BytesIO image,
    # then to a PIL Image, then breaking out the width and height
    bytesIoImage = io.BytesIO(tensorFlowImage)
    pilImage = Image.open(bytesIoImage)
    width, height = pilImage.size

    # get the file name from the file data passed in, and set the image format to .jpg
    fileName = singleFileData.filename.encode('utf8')
    imageFormat = b'jpg'

    # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer
    xMins = []
    xMaxs = []
    yMins = []
    yMaxs = []
    classesAsText = []
    classesAsInts = []

    # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box)
    for index, row in singleFileData.object.iterrows():
        xMins.append(row['xmin'] / width)
        xMaxs.append(row['xmax'] / width)
        yMins.append(row['ymin'] / height)
        yMaxs.append(row['ymax'] / height)
        classesAsText.append(row['class'].encode('utf8'))
        classesAsInts.append(classAsTextToClassAsInt(row['class']))
    # end for

    # finally we can calculate and return the TensorFlow Example
    tfExample = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(fileName),
        'image/source_id': dataset_util.bytes_feature(fileName),
        'image/encoded': dataset_util.bytes_feature(tensorFlowImage),
        'image/format': dataset_util.bytes_feature(imageFormat),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xMins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(yMins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classesAsText),
        'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)}))

    return tfExample
def create_tf_example(filename, writer):
    lines = open(filename).readlines()
    image_filename = lines[0].strip()[1:]
    classes_text = []
    classes = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    im = Image.open(image_filename)
    arr = io.BytesIO()
    im.save(arr, format='PNG')
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    image_format = 'png'
    for line in lines[1:]:
        line = line.strip()
        if line == '':
            continue
        data = line.split(",")
        bbox = list(map(int, map(float, data[:4])))
        class_text = data[4].strip()
        class_idx = labels.index(class_text)
        classes_text.append(class_text)
        classes.append(class_idx)
        xmins.append(bbox[0]/float(width))
        xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box
        ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    writer.write(tf_example.SerializeToString())
  def testDecodeObjectLabelWithText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'dog']
    # Annotation label gets overridden by labelmap id.
    annotated_bbox_classes = [3, 4]
    expected_bbox_classes = [1, 2]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(annotated_bbox_classes),
            })).SerializeToString()
    label_map_string = """
      item {
        id:1
        name:'cat'
      }
      item {
        id:2
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    init = tf.tables_initializer()
    with self.test_session() as sess:
      sess.run(init)
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(expected_bbox_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
  def testDecodeImageKeyAndFilename(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/key/sha256': dataset_util.bytes_feature('abc'),
                'image/filename': dataset_util.bytes_feature('filename')
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
    self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
  def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'cheetah']
    bbox_classes_id = [5, 6]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes_id),
            })).SerializeToString()

    label_map_string = """
      item {
        name:'/m/cat'
        id:3
        display_name:'cat'
      }
      item {
        name:'/m/dog'
        id:1
        display_name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)
    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      sess.run(tf.tables_initializer())
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual([3, -1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
  def testInstancesNotAvailableByDefault(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertTrue(
        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
  def testDecodeMultiClassScores(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    flattened_multiclass_scores = [100., 50.] + [20., 30.]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/multiclass_scores':
                    dataset_util.float_list_feature(flattened_multiclass_scores
                                                   ),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_multiclass_scores=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(flattened_multiclass_scores,
                        tensor_dict[fields.InputDataFields.multiclass_scores])
  def create_tf_record(self):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
    flat_mask = (4 * 5) * [1.0]
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/height': dataset_util.int64_feature(4),
        'image/width': dataset_util.int64_feature(5),
        'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
        'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
        'image/object/class/label': dataset_util.int64_list_feature([2]),
        'image/object/mask': dataset_util.float_list_feature(flat_mask),
    }))
    writer.write(example.SerializeToString())
    writer.close()

    return path
def create_mock_tfrecord():
  pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB')
  image_output_stream = StringIO.StringIO()
  pil_image.save(image_output_stream, format='png')
  encoded_image = image_output_stream.getvalue()

  feature_map = {
      'test_field':
          dataset_util.float_list_feature([1, 2, 3, 4]),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map))
  with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
    writer.write(tf_example.SerializeToString())
def dict_to_tf_example(filename, mask_path, label_map_dict, img_path):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    filename: name of the image 
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      dataset directory holding the actual image data.


  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by filename is not a valid JPEG
  """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    width = np.asarray(image).shape[1]
    height = np.asarray(image).shape[0]
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    mask_np = np.asarray(mask.convert('L'))
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []

    for k in list(mask_pixel.keys()):
        class_name = k
        nonbackground_indices_x = np.any(mask_np == mask_pixel[class_name],
                                         axis=0)
        nonbackground_indices_y = np.any(mask_np == mask_pixel[class_name],
                                         axis=1)
        nonzero_x_indices = np.where(nonbackground_indices_x)
        nonzero_y_indices = np.where(nonbackground_indices_y)

        if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray(
                nonzero_y_indices).shape[1] > 0:
            xmin = float(np.min(nonzero_x_indices))
            xmax = float(np.max(nonzero_x_indices))
            ymin = float(np.min(nonzero_y_indices))
            ymax = float(np.max(nonzero_y_indices))
            print(filename, 'bounding box for', class_name, xmin, xmax, ymin,
                  ymax)

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)

            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])

            mask_remapped = (mask_np == mask_pixel[class_name]).astype(
                np.uint8)
            masks.append(mask_remapped)

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }

    encoded_mask_png_list = []
    for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    if 'x' in annotation and 'x2' in annotation and 'y' in annotation and 'y2' in annotation:
                        xmins.append(annotation['x'])
                        xmaxs.append(annotation['x2'])
                        ymins.append(annotation['y'])
                        ymaxs.append(annotation['y2'])
                        classes_text.append(annotation['label'].encode('utf8'))
                        classes.append(1) # temporary, I need to assign labels to actual ids
                        difficult_obj.append(0)
                        truncated.append(0)
                        poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')
def create_tf_example(img_filename, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(img_filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    match = re.match(r'(.*)\.jpg', img_filename)
    filename = match.group(1)

    img_filename = img_filename.encode('utf8')
    img_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    label_filename = FLAGS.txt_input + '/' + filename + '.txt'
    file = open(label_filename, 'r')
    labels = file.readline()

    match = re.match(r'(.*)\s(.*)\s(.*)\s(.*)\s(.*)\s', labels)
    label = match.group(1)
    xmin = float(match.group(2))
    ymin = float(match.group(3))
    xmax = float(match.group(4))
    ymax = float(match.group(5))

    xmins.append(xmin / width)
    xmaxs.append(xmax / width)
    ymins.append(ymin / height)
    ymaxs.append(ymax / height)
    classes_text.append(label.encode('utf8'))
    classes.append(class_text_to_int(label))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(img_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Example #27
0
def dict_to_tf_example(split_data_dir,
                       name,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert Munich datset to tf.Example proto.
    Convert the image "name" of the Munich dataset (train/val) into record file
    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    IMG_TYPE = '.jpg'
    GT_EXT = '_gt.txt'
    file_name = name + IMG_TYPE
    img_path = os.path.join(split_data_dir, file_name)
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    # Get the image shape information
    tmp_img = cv2.imread(img_path)
    height, width, depth = tmp_img.shape

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    # Get the ground truth bounding box file
    gt_filename = os.path.join(split_data_dir, name + GT_EXT)
    with open(gt_filename) as f:
        lines = f.read().splitlines()
    for line in lines:
        difficult_obj.append(int(0))
        line_float = [float(item) for item in line.split(',')]
        label = int(line_float[-1])
        xmin.append(line_float[0] / width)  # xmin
        ymin.append(line_float[1] / height)  # ymin
        xmax.append(line_float[2] / width)  # xmax
        ymax.append(line_float[3] / height)  # ymax
        classes_text.append(OBJ_NAME[label].encode('utf8'))
        classes.append(USE_LABEL[label])
        # Add zeros for truncated (not being used for Munich dataset)
        truncated.append(0)
        poses.append('none'.encode('utf8'))
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(file_name.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(file_name.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def dict_to_tf_example(data,
                       images_dir,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       keep_empty_image=False):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    full_path = os.path.join(images_dir, data['filename'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format == 'PNG' or image.size[0] > 1920:
        image.thumbnail((1920, 1920), PIL.Image.ANTIALIAS)
        temp_file = io.BytesIO()
        image.save(temp_file, format="jpeg")

        temp_file.seek(0)
        encoded_jpg = temp_file.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)
        image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            if obj['name'] in custom_label_map:
                obj['name'] = custom_label_map.get(obj['name'])

            if not obj['name']:
                continue
            elif FLAGS.skip_category and obj['name'] in set(FLAGS.skip_category.split(',')):
                continue
            difficult_obj.append(int(difficult))
            obj['name'] = obj['name'].lower()
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))
            label_count[obj['name']] = label_count.get(obj['name'], 0) + 1
        if len(data['object']) > 0:
            label_count['total'] += 1
    if len(classes) == 0 and not keep_empty_image:
        return
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
Example #29
0
def create_tf_example(group, path, class_dict):
    import tensorflow as tf
    from object_detection.utils import dataset_util

    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                           'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        if set(['xmin_rel', 'xmax_rel', 'ymin_rel',
                'ymax_rel']).issubset(set(row.index)):
            xmin = row['xmin_rel']
            xmax = row['xmax_rel']
            ymin = row['ymin_rel']
            ymax = row['ymax_rel']

        elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)):
            xmin = row['xmin'] / width
            xmax = row['xmax'] / width
            ymin = row['ymin'] / height
            ymax = row['ymax'] / height

        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_dict[row['class']])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Example #30
0
    img_label = cv2.imread(label)
    img_mask = image2label(img_label)
    encoded_label = img_mask.astype(np.uint8).tobytes()

    height, width = img_label.shape[0], img_label.shape[1]
#    print('the ima height*** %d*******'%height)
#    print('the ima width*** %d*******'%width)
    if height < vgg_16.default_image_size or width < vgg_16.default_image_size:
        # 保证最后随机裁剪的尺寸
        print('the ima default_image_size*** %d*******'%vgg_16.default_image_size)

        return None
feature_dict = {
        'image/height':  dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
          data.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_data),
        'image/label': dataset_util.bytes_feature(encoded_label),
        'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
    # Your code here, fill the dict
    
    ################

    feature_dict = {
        'image/height':  dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
          data.encode('utf8')),
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

    Args:
      image_path: The complete path to image.
      annotations: A dictionary representing the annotation of a single object
        that appears in the image.
      label_map_dict: A map from string label names to integer ids.

    Returns:
      example: The converted tf.Example.
    """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0]*len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_png),
        'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
        'image/object/class/text': dataset_util.bytes_list_feature(
            [x.encode('utf8') for x in annotations['type']]),
        'image/object/class/label': dataset_util.int64_list_feature(
            [label_map_dict[x] for x in annotations['type']]),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.float_list_feature(
            annotations['truncated']),
        'image/object/alpha': dataset_util.float_list_feature(
            annotations['alpha']),
        'image/object/3d_bbox/height': dataset_util.float_list_feature(
            annotations['3d_bbox_height']),
        'image/object/3d_bbox/width': dataset_util.float_list_feature(
            annotations['3d_bbox_width']),
        'image/object/3d_bbox/length': dataset_util.float_list_feature(
            annotations['3d_bbox_length']),
        'image/object/3d_bbox/x': dataset_util.float_list_feature(
            annotations['3d_bbox_x']),
        'image/object/3d_bbox/y': dataset_util.float_list_feature(
            annotations['3d_bbox_y']),
        'image/object/3d_bbox/z': dataset_util.float_list_feature(
            annotations['3d_bbox_z']),
        'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
            annotations['3d_bbox_rot_y']),
    }))

    return example
Example #32
0
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], data['filename']+'.jpg')
  #os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  #print("full_path", full_path)


  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  if 'object' in data:
    for obj in data['object']:
      difficult = bool(int(obj['difficult']))
      if ignore_difficult_instances and difficult:
        continue

      difficult_obj.append(int(difficult))
      brands[obj['name']]=brands[obj['name']]+1
      xmin.append(float(obj['bndbox']['xmin']) / width)
      ymin.append(float(obj['bndbox']['ymin']) / height)
      xmax.append(float(obj['bndbox']['xmax']) / width)
      ymax.append(float(obj['bndbox']['ymax']) / height)
      classes_text.append(obj['name'].encode('utf8'))
      if (obj['name'] in label_map_dict.keys()):
        classes.append(label_map_dict[obj['name']])
      else:
          print("WARNING",full_path)
      truncated.append(int(obj['truncated']))
      poses.append(obj['pose'].encode('utf8'))
  examples_list_number_classes_text[os.path.splitext(full_path)[0]]=len(classes_text)
  examples_list_number_classes[os.path.splitext(full_path)[0]]=len(classes)
  
  logging.info(xmin,ymin,xmax,ymax,classes_text,classes,poses,data['folder'], data['filename'])
  #print(xmin,ymin,xmax,ymax,classes_text,classes,poses,data['folder'], data['filename'])
  #print(xmin,ymin,xmax,ymax)
  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          str.encode(data['folder']+'/'+data['filename'],'utf8')),
      'image/source_id': dataset_util.bytes_feature(
          str.encode(data['folder']+'/'+data['filename'],'utf8')),
      'image/key/sha256': dataset_util.bytes_feature(str.encode(key,'utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature(str.encode('jpeg','utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }))
  if (examples_list_number_classes_text[os.path.splitext(full_path)[0]]!=examples_list_number_classes[os.path.splitext(full_path)[0]]):
      print(full_path,example)
  if (len(classes_text) ==0):
    #logging.info(example)
    print(full_path,example)
    print(full_path,examples_list_number_classes_text[os.path.splitext(full_path)[0]],examples_list_number_classes[os.path.splitext(full_path)[0]])
    # extract pre-trained face detector
    #face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt.xml')

    # load color (BGR) image
    img = cv2.imread(full_path)
    # convert BGR image to grayscale
    #gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    ## find faces in image
    #faces = face_cascade.detectMultiScale(gray)

    # print number of faces detected in the image
    #print('Number of faces detected:', len(faces))

    ## get bounding box for each detected face
    #for (x,y,w,h) in (xmin,ymin,xmax,ymax):
    #    # add bounding box to color image
    #cv2.rectangle(img,(int(xmin[0]),int(ymin[0])),(int(xmax[0]),int(ymax[0])),(255,0,0),2)
    cv2.rectangle(img,(int(xmin[0]*width),int(ymin[0])),(int(xmax[0]),int(ymax[0])),(255,0,0),2)
    
    # convert BGR image to RGB for plotting
    cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #cv2.imshow("Image",cv_rgb)
    # display the image, along with bounding box
    plt.imshow(cv_rgb)
    plt.show()
  return example
Example #33
0
def create_tf_example(
    example,
    size=None,
    label_map_dict=label_map_util.get_label_map_dict(LABEL_MAP_PATH)):
    img_file, boxes = example['img_file', 'boxes']

    img = PIL.Image.open(img_file)

    width, height = img.size

    img_class_text = 'hand'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
    for box in boxes:
        if box is not None:
            xmins.append(float(box['x_min'] + 1) / width)
            xmaxs.append(float(box['x_max'] + 1) / width)
            ymins.append(float(box['y_min'] + 1) / height)
            ymaxs.append(float(box['y_max'] + 1) / height)
            classes_text.append(img_class_text.encode('utf-8'))
            classes.append(label_map_dict[img_class_text])

    if len(xmins) == 0:
        return None

    if size is not None:
        width, height = size

        assert (width > 0 and height > 0)

        img = img.resize((width, height))

    with io.BufferedRandom(io.BytesIO()) as br:
        img.save(br, "JPEG")
        br.seek(0)
        img_encoded = br.read()
    img_encoded_format = b'jpeg'

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(img_file.encode('utf-8')),
            'image/source_id':
            dataset_util.bytes_feature(img_file.encode('utf-8')),
            'image/encoded':
            dataset_util.bytes_feature(img_encoded),
            'image/format':
            dataset_util.bytes_feature(img_encoded_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes)
        }))

    return tf_example
Example #34
0
def dict_to_tf_example(data_dict, label_map_dict, data_dir):
    """
    Creates training example object (see tf.train.Example)

    Arguments:
        data_dict       dictionary created from a PASCAL VOC annotation file
        label_map_dict  dictionary containing class_id to class_name mappings

    Returns:
        A tf.train.Example object containing bounding box annotation data
        as well as encoded JPEG data.
    """

    # Extract information from dictionary
    image_filename = data_dict['filename']
    width = int(data_dict['size']['width'])
    height = int(data_dict['size']['height'])

    # Get JPEG data as encoded bytes
    image_path = os.path.join(data_dir, image_filename)
    encoded_jpg = get_encoded_jpeg(image_path)

    # Create array of class labels for the annotations (i.e., bounding boxes)
    # associated with this training example
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    class_ids = []
    class_names = []
    if 'object' in data_dict:
        for obj in data_dict['object']:
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            class_ids.append(label_map_dict[obj['name']])
            class_names.append(obj['name'].encode('utf8'))

    key = hashlib.sha256(encoded_jpg).hexdigest()

    # Create training example
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(class_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(class_ids)
        }))
    return example
Example #35
0
def image_to_tf_data(img_path, mask_path, xml_path, label_map_dict, filename):
    """Convert image and annotations to tf.tf_data proto.

    Note: if an image contains more than one object from same class
        then xmls files with bounding box annotation need to be provided

    Args:
      img_path: String specifying subdirectory within the dataset directory holding the actual image data.
      mask_path: String path to PNG encoded mask.
      xml_path: String path to XML file holding bounding box annotations
      label_map_dict: A map from string label names to integers ids.
      filename: Name of the image

    Returns:
      example: The converted tf.tf_data

    Raises:
      ValueError: if the image pointed to by filename is not a valid JPEG
    """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    width = np.asarray(image).shape[1]
    height = np.asarray(image).shape[0]
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    mask_np = np.asarray(mask.convert('L'))
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    data = []
    classes = []
    classes_text = []
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    encoded_mask_png_list = []

    if (True == FLAGS.bboxes_provided):
        if not os.path.exists(xml_path):
            logging.warning('Could not find %s, ignoring example.', xml_path)
            return
        with tf.gfile.GFile(xml_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        if 'object' in data:
            for obj in data['object']:
                class_name = obj['name']
                pixel_val = int(label_map_dict[class_name][1])
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
                print(filename, 'bounding box for', class_name, xmin, xmax,
                      ymin, ymax)
                xmins.append(xmin / width)
                ymins.append(ymin / height)
                xmaxs.append(xmax / width)
                ymaxs.append(ymax / height)

                classes_text.append(class_name.encode('utf8'))
                classes.append(label_map_dict[class_name][0])

                mask_np_black = mask_np * 0
                mask_np_black[int(ymin):int(ymax),
                              int(xmin):int(xmax)] = mask_np[
                                  int(ymin):int(ymax),
                                  int(xmin):int(xmax)]
                mask_remapped = (mask_np_black == pixel_val).astype(np.uint8)
                img = PIL.Image.fromarray(mask_remapped)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
    else:
        for key in label_map_dict.keys():
            class_name = key
            pixel_val = int(label_map_dict[class_name][1])
            nonbackground_indices_x = np.any(mask_np == pixel_val, axis=0)
            nonbackground_indices_y = np.any(mask_np == pixel_val, axis=1)
            nonzero_x_indices = np.where(nonbackground_indices_x)
            nonzero_y_indices = np.where(nonbackground_indices_y)

            if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray(
                    nonzero_y_indices).shape[1] > 0:
                xmin = float(np.min(nonzero_x_indices))
                xmax = float(np.max(nonzero_x_indices))
                ymin = float(np.min(nonzero_y_indices))
                ymax = float(np.max(nonzero_y_indices))
                print(filename, 'bounding box for', class_name, xmin, xmax,
                      ymin, ymax)

                xmins.append(xmin / width)
                ymins.append(ymin / height)
                xmaxs.append(xmax / width)
                ymaxs.append(ymax / height)

                classes_text.append(class_name.encode('utf8'))
                classes.append(label_map_dict[class_name][0])

                mask_remapped = (mask_np == pixel_val).astype(np.uint8)
                img = PIL.Image.fromarray(mask_remapped)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/mask':
        dataset_util.bytes_list_feature(encoded_mask_png_list)
    }
    tf_data = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return tf_data
Example #36
0
      ymins.append(ymin / height)
      xmaxs.append(xmax / width)
      ymaxs.append(ymax / height)
      class_name = get_class_name_from_filename(data['filename'])
      classes_text.append(class_name.encode('utf8'))
      classes.append(label_map_dict[class_name])
      truncated.append(int(obj['truncated']))
      poses.append(obj['pose'].encode('utf8'))
      #if not faces_only:
      #  mask_remapped = (mask_np != 2).astype(np.uint8)
      #  masks.append(mask_remapped)

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue

    difficult_obj.append(int(difficult))

    xmin.append(float(obj['bndbox']['xmin']) / width)
    ymin.append(float(obj['bndbox']['ymin']) / height)
    xmax.append(float(obj['bndbox']['xmax']) / width)
    ymax.append(float(obj['bndbox']['ymax']) / height)
    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }))
  return example
Example #38
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
    """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

    filtered_data_frame = annotations_data_frame[
        annotations_data_frame.LabelName.isin(label_map)]

    image_id = annotations_data_frame.ImageID.iloc[0]

    feature_map = {
        standard_fields.TfExampleFields.object_bbox_ymin:
        dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmin:
        dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_ymax:
        dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmax:
        dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
        standard_fields.TfExampleFields.object_class_text:
        dataset_util.bytes_list_feature(
            filtered_data_frame.LabelName.as_matrix()),
        standard_fields.TfExampleFields.object_class_label:
        dataset_util.int64_list_feature(
            filtered_data_frame.LabelName.map(
                lambda x: label_map[x]).as_matrix()),
        standard_fields.TfExampleFields.filename:
        dataset_util.bytes_feature('{}.jpg'.format(image_id)),
        standard_fields.TfExampleFields.source_id:
        dataset_util.bytes_feature(image_id),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    if 'IsGroupOf' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_group_of] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsGroupOf.as_matrix().astype(int))
    if 'IsOccluded' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_occluded] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsOccluded.as_matrix().astype(int))
    if 'IsTruncated' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_truncated] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsTruncated.as_matrix().astype(
                            int))
    if 'IsDepiction' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_depiction] = dataset_util.int64_list_feature(
                        filtered_data_frame.IsDepiction.as_matrix().astype(
                            int))

    return tf.train.Example(features=tf.train.Features(feature=feature_map))
Example #39
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Example #40
0
def dict_to_tf_example(data,
               label_map_dict,
               image_subdirectory,
               ignore_difficult_instances=False):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []

  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
        continue
    difficult_obj.append(int(difficult))
    xmin = float(obj['bndbox']['xmin'])
    xmax = float(obj['bndbox']['xmax'])
    ymin = float(obj['bndbox']['ymin'])
    ymax = float(obj['bndbox']['ymax'])

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)

    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  feature_dict = {
     'image/height': dataset_util.int64_feature(height),
     'image/width': dataset_util.int64_feature(width),
     'image/filename': dataset_util.bytes_feature(
         data['filename'].encode('utf8')),
     'image/source_id': dataset_util.bytes_feature(
         data['filename'].encode('utf8')),
     'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
     'image/encoded': dataset_util.bytes_feature(encoded_jpg),
     'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
     'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
     'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
     'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
     'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
     'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
     'image/object/class/label': dataset_util.int64_list_feature(classes),
     'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
     'image/object/truncated': dataset_util.int64_list_feature(truncated),
     'image/object/view': dataset_util.bytes_list_feature(poses),
    }
  
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Example #41
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for cloud_derby.  Otherwise
      generates bounding boxes (as well as segmentations for full cloud_derby).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []

    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue
        difficult_obj.append(int(difficult))

        xmin = float(obj['bndbox']['xmin'])
        xmax = float(obj['bndbox']['xmax'])
        ymin = float(obj['bndbox']['ymin'])
        ymax = float(obj['bndbox']['ymax'])

        xmins.append(xmin / width)
        ymins.append(ymin / height)
        xmaxs.append(xmax / width)
        ymaxs.append(ymax / height)
        class_name = get_class_name_from_filename(data['filename'])
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    if not faces_only:
        if mask_type == 'numerical':
            mask_stack = np.stack(masks).astype(np.float32)
            masks_flattened = np.reshape(mask_stack, [-1])
            feature_dict['image/object/mask'] = (
                dataset_util.float_list_feature(masks_flattened.tolist()))
        elif mask_type == 'png':
            encoded_mask_png_list = []
            for mask in masks:
                img = PIL.Image.fromarray(mask)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Example #42
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys:
        [u'license', u'file_name', u'coco_url', u'height', u'width',
        u'date_captured', u'flickr_url', u'id']
      annotations_list:
        list of dicts with keys:
        [u'segmentation', u'area', u'iscrowd', u'image_id',
        u'bbox', u'category_id', u'id']
        Notice that bounding box coordinates in the official COCO dataset are
        given as [x, y, width, height] tuples using absolute coordinates where
        x, y represent the top-left (0-indexed) corner.  This function converts
        to the format expected by the Tensorflow Object Detection API (which is
        which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
        to image size).
      image_dir: directory containing the image files.
      category_index: a dict containing COCO category information keyed
        by the 'id' field of each category.  See the
        label_map_util.create_category_index function.
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.
    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
def dict_to_tf_example(labels_corners, labels_center, labels_data, params,
                       label_map_dict, image_dir, image_prefix,
                       image_prev_prefix):

    width = round(params['pointcloud_grid_map_interface']['grids']['cartesian']
                  ['range']['y'] / params['pointcloud_grid_map_interface']
                  ['grids']['cartesian']['resolution']['y'])
    height = round(
        params['pointcloud_grid_map_interface']['grids']['cartesian']['range']
        ['x'] / params['pointcloud_grid_map_interface']['grids']['cartesian']
        ['resolution']['x'])
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    x_c = []
    y_c = []
    w = []
    h = []
    angle = []
    sin_angle = []
    cos_angle = []
    classes = []
    classes_text = []

    for idx, label_corner in enumerate(labels_corners):
        x_min = min(label_corner[0]) / width
        y_min = min(label_corner[1]) / height
        x_max = max(label_corner[0]) / width
        y_max = max(label_corner[1]) / height
        num_detections = _count_number_detections(image_dir, image_prefix,
                                                  (x_min, y_min, x_max, y_max),
                                                  params)
        if num_detections == 0:
            continue
        xmin.append(x_min)
        ymin.append(y_min)
        xmax.append(x_max)
        ymax.append(y_max)
        if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1):
            print(x_min, y_min, x_max, y_max)
            raise ValueError('Box Parameters greather than 1.0')
        if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0):
            raise ValueError('Box Parameters less than 0.0')
        x_c.append(labels_center[idx][0])
        y_c.append(labels_center[idx][1])
        angle_rad = labels_data[idx].rz
        angle.append(angle_rad)
        sin_angle.append(math.sin(2 * angle_rad))
        cos_angle.append(math.cos(2 * angle_rad))
        vec_s_x = math.cos(angle_rad)
        vec_s_y = math.sin(angle_rad)
        w_p = labels_data[idx].w / params['pointcloud_grid_map_interface'][
            'grids']['cartesian']['resolution']['y']
        w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x /
                                (width * width) + vec_s_y * vec_s_y /
                                (height * height))
        w.append(w_p_s)
        l_p = labels_data[idx].l / params['pointcloud_grid_map_interface'][
            'grids']['cartesian']['resolution']['x']
        l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x /
                                (height * height) + vec_s_y * vec_s_y /
                                (width * width))
        h.append(l_p_s)

        class_name = labels_data[idx].type
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    return tf.train.Example(features=tf.train.Features(
        feature={
            'id':
            dataset_util.bytes_feature(image_prefix.encode('utf8')),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'layers/height':
            dataset_util.int64_feature(height),
            'layers/width':
            dataset_util.int64_feature(width),
            'layers/detections/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'detections_cartesian')),
            'layers/observations/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'observations_cartesian')),
            'layers/decay_rate/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'decay_rate_cartesian')),
            'layers/intensity/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'intensity_cartesian')),
            'layers/zmin/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_min_detections_cartesian')),
            'layers/zmax/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_max_detections_cartesian')),
            'layers/occlusions/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_max_occlusions_cartesian')),
            'layers_prev/detections/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'detections_cartesian')),
            'layers_prev/observations/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'observations_cartesian')),
            'layers_prev/decay_rate/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'decay_rate_cartesian')),
            'layers_prev/intensity/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'intensity_cartesian')),
            'layers_prev/zmin/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'z_min_detections_cartesian')),
            'layers_prev/zmax/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'z_max_detections_cartesian')),
            'layers_prev/occlusions/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prev_prefix,
                           'z_max_occlusions_cartesian')),
            'boxes/aligned/x_min':
            dataset_util.float_list_feature(xmin),
            'boxes/aligned/x_max':
            dataset_util.float_list_feature(xmax),
            'boxes/aligned/y_min':
            dataset_util.float_list_feature(ymin),
            'boxes/aligned/y_max':
            dataset_util.float_list_feature(ymax),
            'boxes/inclined/x_c':
            dataset_util.float_list_feature(x_c),
            'boxes/inclined/y_c':
            dataset_util.float_list_feature(y_c),
            'boxes/inclined/w':
            dataset_util.float_list_feature(w),
            'boxes/inclined/h':
            dataset_util.float_list_feature(h),
            'boxes/inclined/angle':
            dataset_util.float_list_feature(angle),
            'boxes/inclined/sin_angle':
            dataset_util.float_list_feature(sin_angle),
            'boxes/inclined/cos_angle':
            dataset_util.float_list_feature(cos_angle),
            'boxes/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'boxes/class/label':
            dataset_util.int64_list_feature(classes),
        }))
Example #44
0
def CreateTFExample(img_path, img_name, annotation):
    """
    create tf record example
    this function runs once per image
    
    args:
        img_path: image path
        img_name: image name
        annotation: annotation dictionary for current image
    """
    #img_name=annotation['name'] # for viewnyx part 2

    with tf.gfile.GFile(os.path.join(img_path, img_name), 'rb') as fid:
        encoded_jpg = fid.read()

    img_format = img_name.split('.')[-1]
    width = annotation['width']
    height = annotation['height']

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for bbx in annotation['annotations']:
        xmins.append(bbx['x'] / width)
        xmaxs.append((bbx['x'] + bbx['width']) / width)
        ymins.append(bbx['y'] / height)
        ymaxs.append((bbx['y'] + bbx['height']) / height)
        classes_text.append(bbx['label'].lower().encode('utf8'))
        classes.append(GetClassID(bbx['label'].lower()))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(img_name.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(img_name.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(img_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Example #45
0
def create_tf_example(name):
    # TODO(user): Populate the following variables from your example.
    b_image = util.encode_image_png(
        os.path.join(source_img_dir, name) + '.' + fileformat)
    label_objects = util.parse_dota_poly(
        os.path.join(source_label_dir, name) + '.txt')

    width, height = Image.open(
        os.path.join(source_img_dir, name) + '.' +
        fileformat).size  # Image width, height
    filename = name.encode(
    )  # Filename of the image. Empty if image is not from file
    encoded_image_data = b_image  # Encoded image bytes
    if fileformat == 'jpg':
        image_format = b'jpeg'
    else:
        image_format = b'png'  # b'jpeg' or b'png'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for obj in label_objects:
        poly = obj['poly']
        xmin, xmax, ymin, ymax = util.dots4ToRec4(poly)
        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(obj['name'].encode())
        classes.append(util.dota_10.index(obj['name']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def dict_to_tf_example(data, image_dir, label_map_dict):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding
    box coordinates provided by the raw data.

    Arguments:
        data: dict holding XML fields for a single image (obtained by
          running dataset_util.recursive_parse_xml_to_dict)
        image_dir: Path to image directory.
        label_map_dict: A map from string label names to integers ids.

    Returns:
        example: The converted tf.Example.
    """
    full_path = os.path.join(image_dir, data['filename'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    try:
        for obj in data['object']:
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
    except KeyError:
        print(data['filename'] + ' without objects!')

    difficult_obj = [0] * len(classes)
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj)
        }))
    return example
Example #47
0
def create_tf_example(example):

    image_path = os.getcwd() + '/' + FLAGS.images_dir + example
    labels_path = os.getcwd() + '/' + FLAGS.labels_dir + os.path.splitext(
        example)[0] + '.xml'

    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_img = fid.read()
    encoded_io = io.BytesIO(encoded_img)
    image = PIL.Image.open(encoded_io)

    key = hashlib.sha256(encoded_img).hexdigest()

    with tf.gfile.GFile(labels_path, 'r') as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    # Read the image
    # img = Image.open(image_path)
    width = int(data['size']['width'])
    height = int(data['size']['height'])

    image_format = 'png'

    # Read the label XML
    # tree = ET.parse(labels_path)
    # root = tree.getroot()
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []

    for obj in data['object']:
        xmin = float(obj['bndbox']['xmin'])
        xmax = float(obj['bndbox']['xmax'])
        ymin = float(obj['bndbox']['ymin'])
        ymax = float(obj['bndbox']['ymax'])

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)

    classes_text = ['target'.encode('utf8')]
    classes = [1]

    print(xmins)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_img),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Example #48
0
def dict_to_tf_example(data, label_map_dict, img_path):

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = image.width
    height = image.height

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []

    for shape in data['Layers']['Layer']['Shapes']['Shape']:

        text = shape['BlockText']['Text'].text
        if not (text.startswith('Panel') or text.startswith('panel')):
            continue

        attrib = shape['Data']['Extent'].attrib
        x = float(attrib['X'])
        y = float(attrib['Y'])
        w = float(attrib['Width'])
        h = float(attrib['Height'])

        xmin = x
        xmax = x + w
        ymin = y
        ymax = y + h

        xmin /= width
        ymin /= height
        xmax /= width
        ymax /= height

        if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01:
            print(img_path)

        xmins.append(xmin)
        ymins.append(ymin)
        xmaxs.append(xmax)
        ymaxs.append(ymax)

        class_name = 'Panel'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       categories,
                       image_subdirectory='JPEGImages',
                       flip=False,
                       eval=False):

    full_path = get_image_full_path(dataset_directory, image_subdirectory, data['filename'])
    if eval:
        cop = 'data/inference/' + dataset_directory.split('/')[-2] + '-' + data['filename'] + '.' + full_path.split('.')[-1]
        copyfile(full_path, cop)
    encoded_jpg = preprocess_image(full_path, horizontal_flip=flip)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    if 'object' not in data.keys():
        print('No label detected in the xml format')
    else:
        for obj in data['object']:
            if obj['name'] in categories:
                if flip:
                    print('flip')
                    c = str(get_horizontal_flipped_index(int(obj['name'])))
                    xmin.append(1.0 - float(obj['bndbox']['xmax']) / width)
                    ymin.append(float(obj['bndbox']['ymin']) / height)
                    xmax.append(1.0 - float(obj['bndbox']['xmin']) / width)
                    ymax.append(float(obj['bndbox']['ymax']) / height)
                    classes_text.append(c.encode('utf8'))
                    classes.append(label_map_dict[c])
                else:
                    c = obj['name']
                    xmin.append(float(obj['bndbox']['xmin']) / width)
                    ymin.append(float(obj['bndbox']['ymin']) / height)
                    xmax.append(float(obj['bndbox']['xmax']) / width)
                    ymax.append(float(obj['bndbox']['ymax']) / height)
                    if max([float(obj['bndbox']['xmin']) / width, float(obj['bndbox']['ymin']) / height, float(obj['bndbox']['xmax']) / width, float(obj['bndbox']['ymax']) / height]) > 1.0:
                        print('error')
                        raise Exception('oops')
                    classes_text.append(c.encode('utf8'))
                    classes.append(label_map_dict[c])

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return example
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  mask_np = np.asarray(mask)
  nonbackground_indices_x = np.any(mask_np != 2, axis=0)
  nonbackground_indices_y = np.any(mask_np != 2, axis=1)
  nonzero_x_indices = np.where(nonbackground_indices_x)
  nonzero_y_indices = np.where(nonbackground_indices_y)

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))

    if faces_only:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])
    else:
      xmin = float(np.min(nonzero_x_indices))
      xmax = float(np.max(nonzero_x_indices))
      ymin = float(np.min(nonzero_y_indices))
      ymax = float(np.max(nonzero_y_indices))

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
      mask_remapped = (mask_np != 2).astype(np.uint8)
      masks.append(mask_remapped)

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
    if mask_type == 'numerical':
      mask_stack = np.stack(masks).astype(np.float32)
      masks_flattened = np.reshape(mask_stack, [-1])
      feature_dict['image/object/mask'] = (
          dataset_util.float_list_feature(masks_flattened.tolist()))
    elif mask_type == 'png':
      encoded_mask_png_list = []
      for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
      feature_dict['image/object/mask'] = (
          dataset_util.bytes_list_feature(encoded_mask_png_list))

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Example #51
0
def create_tf_detection_example(group, path):
    try:
        with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
            encoded_jpg = fid.read()
    except:
        traceback.print_exc()
        print('error in opening: ' + os.path.join(path, '{}'.format(group.filename)))
        return None
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
    # im = Image.open(os.path.join(path, '{}'.format(group.filename)))
    for index, row in group.object.iterrows():
        # if row['class'] == 2:
        #     continue
        if max(int(row['lt']), int(row['lb'])) >= int(row['width']) or max(int(row['tl']), int(row['tr'])) >= int(
                row['height']) or min(int(row['rt']), int(row['rb'])) <= 0 or min(int(row['br']), int(row['bl'])) <= 0:
            continue
        xmin = min(float(row['lt']), float(row['lb']))
        xmax = max(float(row['rt']), float(row['rb']))
        ymin = min(float(row['tl']), float(row['tr']))
        ymax = max(float(row['bl']), float(row['br']))

        xmin += -6  # random.randint(-5, 2)
        xmax += 6  # random.randint(2, 5)
        ymin += -5  # random.randint(-5, 2)
        ymax += 5  # random.randint(2, 5)

        xmin = max(0, xmin)
        xmax = min(xmax, row['width'])
        ymin = max(0, ymin)
        ymax = min(ymax, row['height'])
        # d_im = im.crop((xmin, ymin, xmax, ymax))
        # d_im.show("")
        # return None
        xmins.append(xmin / row['width'])
        xmaxs.append(xmax / row['width'])
        ymins.append(ymin / row['height'])
        ymaxs.append(ymax / row['height'])
        classes_text.append(class_int_to_text(row['class']))
        classes.append(class_int_to_int(row['class']))

    if len(xmins) == 0:
        return None

    tf_example_detection = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(row['height']),
        'image/width': dataset_util.int64_feature(row['width']),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example_detection
Example #52
0
def _create_tf_example(input: pd.Series) -> tf.train.Example:
    #   outputs one tf record per call. Therefore, to convert all relevant images to tf records we need to loop and
    # call this method on each EXAMPLE
    """example: the input that holds all necessary info to convert it to a tf record. Not actual JPG file but
    some object that holds all the info about that jpg
    INPUT: is a pandas series
    :returns a tf.train.Example which i think is then transformed into a string and that string is used as input to be
    written by a TFRecord writer into a file, and that file is in TFRecord format"""
    height = input[csv_column_names[2]]  # Image height
    width = input[csv_column_names[1]]  # Image width
    # NOTE: needed to encode the below string - the example in the github just leaves in str format which will give
    # you a type error in the below Example() initialization
    filename = str.encode(
        input[csv_column_names[0]]
    )  # Filename of the image. Empty if image is not from file. Note not the whole path. only the filename
    # NOTE: gfile is mostly just a wrappe for Python's filesystem with open API. But can handle opening files
    # that are not local (like on google storage and HDFS as well)
    # see: https://stackoverflow.com/questions/42256938/what-does-tf-gfile-do-in-tensorflow?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
    with tf.gfile.GFile(input[path_column], 'rb') as fid:
        # this jpg is in bytes format which is used when creating an Example object
        encoded_jpg = fid.read()
    # NOTE: in contrast with the raccoon dataset github don't open image to get width and height because we already have that
    # info when converting to xml so I think that is duplicative. Not sure why he does it
    image_format = b'jpg'  # b'jpeg' or b'png' - assume jpg files

    # below lists just have one element in them. see example:
    # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
    xmins = [
        input[csv_column_names[4]] / width
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [input[csv_column_names[6]] / width
             ]  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
        input[csv_column_names[5]] / height
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [input[csv_column_names[7]] / height
             ]  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    # NOTE: needed to encode the below string - the example in the github just leaves in str format which will give
    # you a type error in the below Example() initialization
    classes_text = [str.encode(input[csv_column_names[3]])
                    ]  # List of string class name of bounding box (1 per box)
    classes = [label_map[input[csv_column_names[3]]]
               ]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def _create_tf_example(row, img_input):
    if _is_benign(row):
        folder_name = 'benign'
    elif _is_cancer(row):
        folder_name = 'cancer'
    else:
        raise InvalidFileNameError("Invalid Filename")
    full_path = os.path.join(img_input, folder_name,
                             '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    channels = row['channels']
    shape = [int(height), int(width), int(channels)]
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [_class_text_to_int(row['class'])]
    difficult = [0]
    truncated = [0]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/channels':
            dataset_util.int64_feature(channels),
            'image/shape':
            dataset_util.int64_list_feature(shape),
            'image/class':
            dataset_util.int64_list_feature(classes),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/bbox/label':
            dataset_util.int64_list_feature(classes),
            'image/object/bbox/label_text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/bbox/difficult':
            dataset_util.int64_list_feature(difficult),
            'image/object/bbox/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
        }))
    return tf_example
def create_tf_example(example):

    height = 660  # Image height
    width = 512  # Image width

    filename = example[0]['image'][
        'original_filename']  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example[0]['image']['original_filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for box in example[1]['boxes']:

        xmins.append(float(box['x_min'] / width))
        xmaxs.append(float(box['x_max'] / width))
        ymins.append(float(box['y_min'] / height))
        ymaxs.append(float(box['y_max'] / height))
        classes_text.append(box['label_name'].encode())
        classes.append(int(LABEL_DICT[box['label_id']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
def prepare_example(image_path, annotations, label_map_dict):
  """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
  with tf.gfile.GFile(image_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = pil.open(encoded_png_io)
  image = np.asarray(image)

  key = hashlib.sha256(encoded_png).hexdigest()

  width = int(image.shape[1])
  height = int(image.shape[0])

  xmin_norm = annotations['2d_bbox_left'] / float(width)
  ymin_norm = annotations['2d_bbox_top'] / float(height)
  xmax_norm = annotations['2d_bbox_right'] / float(width)
  ymax_norm = annotations['2d_bbox_bottom'] / float(height)

  difficult_obj = [0]*len(xmin_norm)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
      'image/object/class/text': dataset_util.bytes_list_feature(
          [x.encode('utf8') for x in annotations['type']]),
      'image/object/class/label': dataset_util.int64_list_feature(
          [label_map_dict[x] for x in annotations['type']]),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.float_list_feature(
          annotations['truncated']),
      'image/object/alpha': dataset_util.float_list_feature(
          annotations['alpha']),
      'image/object/3d_bbox/height': dataset_util.float_list_feature(
          annotations['3d_bbox_height']),
      'image/object/3d_bbox/width': dataset_util.float_list_feature(
          annotations['3d_bbox_width']),
      'image/object/3d_bbox/length': dataset_util.float_list_feature(
          annotations['3d_bbox_length']),
      'image/object/3d_bbox/x': dataset_util.float_list_feature(
          annotations['3d_bbox_x']),
      'image/object/3d_bbox/y': dataset_util.float_list_feature(
          annotations['3d_bbox_y']),
      'image/object/3d_bbox/z': dataset_util.float_list_feature(
          annotations['3d_bbox_z']),
      'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
          annotations['3d_bbox_rot_y']),
  }))

  return example
Example #56
0
def create_tf_example(image_path,
                      image,
                      annotations_list,
                      category_index,
                      include_masks=False):
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for idx, object_annotations in enumerate(annotations_list):
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])
        if include_masks:
            segm = object_annotations['segmentation']
            if isinstance(segm, list):
                rles = mask.frPyObjects(segm, image_height, image_width)
                rle = mask.merge(rles)
                m = mask.decode(rle)
            else:
                m = mask.decode(segm)
            pil_image = PIL.Image.fromarray(m)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
            if DUMP_MASK_IMAGES:
                m[m > 0] = 255
                pil_image = PIL.Image.fromarray(m)
                save_path = filename.split('.')[0] + "_" + str(idx) + ".png"
                save_path = FLAGS.output_dir + '/' + filename.split(
                    '.')[0] + '_mask_' + str(idx) + '.png'
                pil_image.save(save_path)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    xmins.append(annotation['x'])
                    xmaxs.append(annotation['x2'])
                    ymins.append(annotation['y'])
                    ymaxs.append(annotation['y2'])
                    classes_text.append(annotation['label'].encode('utf8'))
                    classes.append(1) # temporary, I need to assign labels to actual ids
                    difficult_obj.append(0)
                    truncated.append(0)
                    poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')
Example #58
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
  """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]

  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
      standard_fields.TfExampleFields.source_id:
          dataset_util.bytes_feature(image_id),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsOccluded.as_matrix().astype(int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsTruncated.as_matrix().astype(int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsDepiction.as_matrix().astype(int))

  return tf.train.Example(features=tf.train.Features(feature=feature_map))
Example #59
0
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    # with tf.gfile.GFile(mask_path, 'rb') as fid:
    #   encoded_mask_png = fid.read()
    # encoded_png_io = io.BytesIO(encoded_mask_png)
    # mask = PIL.Image.open(encoded_png_io)
    # if mask.format != 'PNG':
    #   raise ValueError('Mask format not PNG')

    # mask_np = np.asarray(mask)
    # nonbackground_indices_x = np.any(mask_np != 2, axis=0)
    # nonbackground_indices_y = np.any(mask_np != 2, axis=1)
    # nonzero_x_indices = np.where(nonbackground_indices_x)
    # nonzero_y_indices = np.where(nonbackground_indices_y)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    #  masks = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue
        difficult_obj.append(int(difficult))

        #    if faces_only:
        #      xmin = float(obj['bndbox']['xmin'])
        #      xmax = float(obj['bndbox']['xmax'])
        #      ymin = float(obj['bndbox']['ymin'])
        #     ymax = float(obj['bndbox']['ymax'])
        #    else:
        #      xmin = float(np.min(nonzero_x_indices))
        #      xmax = float(np.max(nonzero_x_indices))
        #      ymin = float(np.min(nonzero_y_indices))
        #      ymax = float(np.max(nonzero_y_indices))

        xmin = float(obj['bndbox']['xmin'])
        xmax = float(obj['bndbox']['xmax'])
        ymin = float(obj['bndbox']['ymin'])
        ymax = float(obj['bndbox']['ymax'])

        xmins.append(xmin / width)
        ymins.append(ymin / height)
        xmaxs.append(xmax / width)
        ymaxs.append(ymax / height)
        #class_name = get_class_name_from_filename(data['filename'])
        class_name = obj['name']
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))


#    if not faces_only:
#      mask_remapped = mask_np != 2
#      masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    #  if not faces_only:
    #    mask_stack = np.stack(masks).astype(np.float32)
    #    masks_flattened = np.reshape(mask_stack, [-1])
    #    feature_dict['image/object/mask'] = (
    #        dataset_util.float_list_feature(masks_flattened.tolist()))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Example #60
0
def create_tf_example(group, path):
    try:
        with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                            'rb') as fid:
            encoded_jpg = fid.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)
    except:
        print("no existing file:",
              os.path.join(path, '{}'.format(group.filename)))
        return
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmin = row['xmin'] / width
        xmax = row['xmax'] / width
        ymin = row['ymin'] / height
        ymax = row['ymax'] / height
        xmin = np.min(xmin, xmax)
        xmax = np.max(xmin, xmax)
        ymin = np.min(ymin, ymax)
        ymax = np.max(ymin, ymax)
        if xmin < 0.0:
            xmin = 0.0
        elif xmin > 1.0:
            xmin = 1.0

        if xmax < 0.0:
            xmax = 0.0
        elif xmax > 1.0:
            xmax = 1.0

        if ymin < 0.0:
            ymin = 0.0
        elif ymin > 1.0:
            ymin = 1.0

        if ymax < 0.0:
            ymax = 0.0
        elif ymax > 1.0:
            ymax = 1.0

        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymaxs)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example