Esempio n. 1
0
def create_tf_example(example, writer):
  height = example['height']
  width = example['width']
  filename = example['filename']
  encoded_image_data = example['encoded_image_data']
  image_format = example['image_format']

  bboxes = example['bbox']
  xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box
  ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box
  classes_text = example['class_text']
  classes = example['class_idx']

  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  writer.write(tf_example.SerializeToString())
def create_tf_example(row):
    full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [class_text_to_int(row['class'])]

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
  def testDecodeObjectGroupOf(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_group_of = [0, 1]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/group_of':
                    dataset_util.int64_list_feature(object_group_of),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        [bool(item) for item in object_group_of],
        tensor_dict[fields.InputDataFields.groundtruth_group_of])
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
    image_width = 3

    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
         .get_shape().as_list()), [4, 5, 3])

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
                         .get_shape().as_list()), [4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        instance_masks.astype(np.float32),
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
    self.assertAllEqual(object_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
def dict_to_tf_example(data,
                       label_map_dict,
                       data_dir):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    """
  
    img_path = os.path.join(data_dir, data.replace("mask", "images"))
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = 512
    height = 512

    classes = []
    classes_text = []
    encoded_mask_png_list = []
    mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255
    output = io.BytesIO()
    encoded_mask_png_list.append(mask_png.save(output, mask_png))
    class_name = 'water'
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])


    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
 def testDecodeImageLabels(self):
   image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
   encoded_jpeg = self._EncodeImage(image_tensor)
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
               'image/format': dataset_util.bytes_feature('jpeg'),
               'image/class/label': dataset_util.int64_list_feature([1, 2]),
           })).SerializeToString()
   example_decoder = tf_example_decoder.TfExampleDecoder()
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 2]))
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded':
                   dataset_util.bytes_feature(encoded_jpeg),
               'image/format':
                   dataset_util.bytes_feature('jpeg'),
               'image/class/text':
                   dataset_util.bytes_list_feature(['dog', 'cat']),
           })).SerializeToString()
   label_map_string = """
     item {
       id:3
       name:'cat'
     }
     item {
       id:1
       name:'dog'
     }
   """
   label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
   with tf.gfile.Open(label_map_path, 'wb') as f:
     f.write(label_map_string)
   example_decoder = tf_example_decoder.TfExampleDecoder(
       label_map_proto_file=label_map_path)
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     sess.run(tf.tables_initializer())
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 3]))
def createTfExample(singleFileData, path):
    # use TensorFlow's GFile function to open the .jpg image matching the current box data
    with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile:
        tensorFlowImage = tensorFlowImageFile.read()
    # end with

    # get the image width and height via converting from a TensorFlow image to an io library BytesIO image,
    # then to a PIL Image, then breaking out the width and height
    bytesIoImage = io.BytesIO(tensorFlowImage)
    pilImage = Image.open(bytesIoImage)
    width, height = pilImage.size

    # get the file name from the file data passed in, and set the image format to .jpg
    fileName = singleFileData.filename.encode('utf8')
    imageFormat = b'jpg'

    # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer
    xMins = []
    xMaxs = []
    yMins = []
    yMaxs = []
    classesAsText = []
    classesAsInts = []

    # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box)
    for index, row in singleFileData.object.iterrows():
        xMins.append(row['xmin'] / width)
        xMaxs.append(row['xmax'] / width)
        yMins.append(row['ymin'] / height)
        yMaxs.append(row['ymax'] / height)
        classesAsText.append(row['class'].encode('utf8'))
        classesAsInts.append(classAsTextToClassAsInt(row['class']))
    # end for

    # finally we can calculate and return the TensorFlow Example
    tfExample = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(fileName),
        'image/source_id': dataset_util.bytes_feature(fileName),
        'image/encoded': dataset_util.bytes_feature(tensorFlowImage),
        'image/format': dataset_util.bytes_feature(imageFormat),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xMins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(yMins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classesAsText),
        'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)}))

    return tfExample
def create_tf_example(filename, writer):
    lines = open(filename).readlines()
    image_filename = lines[0].strip()[1:]
    classes_text = []
    classes = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    im = Image.open(image_filename)
    arr = io.BytesIO()
    im.save(arr, format='PNG')
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    image_format = 'png'
    for line in lines[1:]:
        line = line.strip()
        if line == '':
            continue
        data = line.split(",")
        bbox = list(map(int, map(float, data[:4])))
        class_text = data[4].strip()
        class_idx = labels.index(class_text)
        classes_text.append(class_text)
        classes.append(class_idx)
        xmins.append(bbox[0]/float(width))
        xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box
        ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    writer.write(tf_example.SerializeToString())
  def testDecodeObjectLabelWithText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'dog']
    # Annotation label gets overridden by labelmap id.
    annotated_bbox_classes = [3, 4]
    expected_bbox_classes = [1, 2]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(annotated_bbox_classes),
            })).SerializeToString()
    label_map_string = """
      item {
        id:1
        name:'cat'
      }
      item {
        id:2
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    init = tf.tables_initializer()
    with self.test_session() as sess:
      sess.run(init)
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(expected_bbox_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
  def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = ['cat', 'cheetah']
    bbox_classes_id = [5, 6]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes_id),
            })).SerializeToString()

    label_map_string = """
      item {
        name:'/m/cat'
        id:3
        display_name:'cat'
      }
      item {
        name:'/m/dog'
        id:1
        display_name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)
    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      sess.run(tf.tables_initializer())
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual([3, -1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
Esempio n. 12
0
  def testInstancesNotAvailableByDefault(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertTrue(
        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
  def create_tf_record(self):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
    flat_mask = (4 * 5) * [1.0]
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/height': dataset_util.int64_feature(4),
        'image/width': dataset_util.int64_feature(5),
        'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
        'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
        'image/object/class/label': dataset_util.int64_list_feature([2]),
        'image/object/mask': dataset_util.float_list_feature(flat_mask),
    }))
    writer.write(example.SerializeToString())
    writer.close()

    return path
def dict_to_tf_example(split_data_dir,
                       name,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert Munich datset to tf.Example proto.
    Convert the image "name" of the Munich dataset (train/val) into record file
    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    IMG_TYPE = '.jpg'
    GT_EXT = '_gt.txt'
    file_name = name + IMG_TYPE
    img_path = os.path.join(split_data_dir, file_name)
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    # Get the image shape information
    tmp_img = cv2.imread(img_path)
    height, width, depth = tmp_img.shape

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    # Get the ground truth bounding box file
    gt_filename = os.path.join(split_data_dir, name + GT_EXT)
    with open(gt_filename) as f:
        lines = f.read().splitlines()
    for line in lines:
        difficult_obj.append(int(0))
        line_float = [float(item) for item in line.split(',')]
        label = int(line_float[-1])
        xmin.append(line_float[0] / width)  # xmin
        ymin.append(line_float[1] / height)  # ymin
        xmax.append(line_float[2] / width)  # xmax
        ymax.append(line_float[3] / height)  # ymax
        classes_text.append(OBJ_NAME[label].encode('utf8'))
        classes.append(USE_LABEL[label])
        # Add zeros for truncated (not being used for Munich dataset)
        truncated.append(0)
        poses.append('none'.encode('utf8'))
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(file_name.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(file_name.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Esempio n. 15
0
def createTfExample(singleFileData, path):
    # use TensorFlow's GFile function to open the .jpg image matching the current box data
    with tf.gfile.GFile(
            os.path.join(path, '{}'.format(singleFileData.filename)),
            'rb') as tensorFlowImageFile:
        tensorFlowImage = tensorFlowImageFile.read()
    # end with

    # get the image width and height via converting from a TensorFlow image to an io library BytesIO image,
    # then to a PIL Image, then breaking out the width and height
    bytesIoImage = io.BytesIO(tensorFlowImage)
    pilImage = Image.open(bytesIoImage)
    width, height = pilImage.size

    # get the file name from the file data passed in, and set the image format to .jpg
    fileName = singleFileData.filename.encode('utf8')
    imageFormat = b'jpg'

    # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer
    xMins = []
    xMaxs = []
    yMins = []
    yMaxs = []
    classesAsText = []
    classesAsInts = []

    # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box)
    for index, row in singleFileData.object.iterrows():
        xMins.append(row['xmin'] / width)
        xMaxs.append(row['xmax'] / width)
        yMins.append(row['ymin'] / height)
        yMaxs.append(row['ymax'] / height)
        classesAsText.append(row['class'].encode('utf8'))
        classesAsInts.append(classAsTextToClassAsInt(row['class']))
    # end for

    # finally we can calculate and return the TensorFlow Example
    tfExample = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(fileName),
            'image/source_id':
            dataset_util.bytes_feature(fileName),
            'image/encoded':
            dataset_util.bytes_feature(tensorFlowImage),
            'image/format':
            dataset_util.bytes_feature(imageFormat),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xMins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xMaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(yMins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(yMaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classesAsText),
            'image/object/class/label':
            dataset_util.int64_list_feature(classesAsInts)
        }))

    return tfExample
Esempio n. 16
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    # with tf.gfile.GFile(mask_path, 'rb') as fid:
    #   encoded_mask_png = fid.read()
    # encoded_png_io = io.BytesIO(encoded_mask_png)
    # mask = PIL.Image.open(encoded_png_io)
    # if mask.format != 'PNG':
    #   raise ValueError('Mask format not PNG')

    # mask_np = np.asarray(mask)
    # nonbackground_indices_x = np.any(mask_np != 2, axis=0)
    # nonbackground_indices_y = np.any(mask_np != 2, axis=1)
    # nonzero_x_indices = np.where(nonbackground_indices_x)
    # nonzero_y_indices = np.where(nonbackground_indices_y)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))

            # if faces_only:
            if 1:
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
            # else:
            #   xmin = float(np.min(nonzero_x_indices))
            #   xmax = float(np.max(nonzero_x_indices))
            #   ymin = float(np.min(nonzero_y_indices))
            #   ymax = float(np.max(nonzero_y_indices))

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)
            # class_name = get_class_name_from_filename(data['filename'])
            class_name = obj['name']
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))
            if not faces_only:
                mask_remapped = (mask_np != 2).astype(np.uint8)
                masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    if not faces_only:
        if mask_type == 'numerical':
            mask_stack = np.stack(masks).astype(np.float32)
            masks_flattened = np.reshape(mask_stack, [-1])
            feature_dict['image/object/mask'] = (
                dataset_util.float_list_feature(masks_flattened.tolist()))
        elif mask_type == 'png':
            encoded_mask_png_list = []
            for mask in masks:
                img = PIL.Image.fromarray(mask)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Esempio n. 17
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
  """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]

  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
      standard_fields.TfExampleFields.source_id:
          dataset_util.bytes_feature(image_id),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsOccluded.as_matrix().astype(int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsTruncated.as_matrix().astype(int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
                    filtered_data_frame.IsDepiction.as_matrix().astype(int))

  return tf.train.Example(features=tf.train.Features(feature=feature_map))
Esempio n. 18
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_maps,
                                           encoded_image):
  """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_maps: list of label maps (first = labels, second = countries)
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

  label_map, country_map = label_maps
  
  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]
  filtered_data_frame = filtered_data_frame[
      filtered_data_frame.country.isin(country_map)]
  filtered_data_frame_boxes = filtered_data_frame[
      ~filtered_data_frame.YMin.isnull()]
  filtered_data_frame_labels = filtered_data_frame[
      filtered_data_frame.YMin.isnull()]
  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(
              filtered_data_frame_boxes.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame_boxes.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.country_class_text:
          dataset_util.bytes_list_feature(
              filtered_data_frame_boxes.country.as_matrix()),
      standard_fields.TfExampleFields.country_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.country.map(lambda x: country_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
      standard_fields.TfExampleFields.source_id:
          dataset_util.bytes_feature(image_id),
      standard_fields.TfExampleFields.image_encoded:
          dataset_util.bytes_feature(encoded_image),
  }

  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
                        int))

  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
    feature_map[standard_fields.TfExampleFields.
                image_class_label] = dataset_util.int64_list_feature(
                    filtered_data_frame_labels.LabelName.map(
                        lambda x: label_map[x]).as_matrix())
    feature_map[standard_fields.TfExampleFields.
                image_class_text] = dataset_util.bytes_list_feature(
                    filtered_data_frame_labels.LabelName.as_matrix()),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
Esempio n. 19
0
def main(_):

    # Write classes file
    os.makedirs(os.path.dirname(config.CLASSES_FILE), exist_ok=True)
    file = open(config.CLASSES_FILE, "w")
    for label, id in config.CLASSES.items():
        file.write("item {\n\tid: " + str(id) + "\n\tname: '" + label +
                   "'\n}\n")
    file.close()

    annotations = {}
    # Parse annotation file
    rows = open(config.ANNOTATION_PATH).read().strip().split("\n")
    # Ignore first line header
    for row in rows[1:]:
        # Get image path and bounding box coordinates
        row, _ = row.split(",")
        img_path, label, x1, y1, x2, y2, _ = row.split(";")
        if label not in config.CLASSES:
            continue

        img_path = os.path.sep.join([config.BASE_PATH, img_path])
        x1, y1, x2, y2 = map(float, [x1, y1, x2, y2])

        # Add image path and bounding box information to dictionary
        info = annotations.get(img_path, [])
        info.append((label, (x1, y1, x2, y2)))
        annotations[img_path] = info

    train_paths, test_paths = train_test_split(list(annotations.keys()),
                                               test_size=config.TEST_SIZE,
                                               random_state=42)

    datasets = [("train", train_paths, config.TRAIN_RECORD),
                ("test", test_paths, config.TEST_RECORD)]

    for type, img_paths, output_path in datasets:
        print(f"[INFO] Building {output_path} ...")
        writer = tf.python_io.TFRecordWriter(output_path)

        for img_path in tqdm.tqdm(img_paths):
            # Load image as tf object
            encoded = tf.gfile.GFile(img_path, "rb").read()

            # Get image size
            image = Image.open(io.BytesIO(encoded))
            width, height = image.size[:2]

            # Get filename and format
            filename = img_path.split(os.path.sep)[-1]
            img_format = filename[filename.rfind(".") + 1:]

            # Get bounding boxes
            xmins, xmaxs = [], []
            ymins, ymaxs = [], []
            classes_texts, classes = [], []
            for label, (x1, y1, x2, y2) in annotations[img_path]:
                xmins.append(x1 / width)
                xmaxs.append(x2 / width)
                ymins.append(y1 / height)
                ymaxs.append(y2 / height)
                classes_texts.append(label.encode("utf8"))
                classes.append(config.CLASSES[label])

            features = tf.train.Features(
                feature={
                    "image/height":
                    dataset_util.int64_feature(height),
                    "image/width":
                    dataset_util.int64_feature(width),
                    "image/filename":
                    dataset_util.bytes_feature(filename.encode("utf8")),
                    "image/source_id":
                    dataset_util.bytes_feature(filename.encode("utf8")),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded),
                    "image/format":
                    dataset_util.bytes_feature(img_format.encode("utf8")),
                    "image/object/bbox/xmin":
                    dataset_util.float_list_feature(xmins),
                    "image/object/bbox/xmax":
                    dataset_util.float_list_feature(xmaxs),
                    "image/object/bbox/ymin":
                    dataset_util.float_list_feature(ymins),
                    "image/object/bbox/ymax":
                    dataset_util.float_list_feature(ymaxs),
                    "image/object/class/text":
                    dataset_util.bytes_list_feature(classes_texts),
                    "image/object/class/label":
                    dataset_util.int64_list_feature(classes)
                })
            example = tf.train.Example(features=features)
            writer.write(example.SerializeToString())
        writer.close()
Esempio n. 20
0
def create_tf_example(image,
                      annotations_list,
                      caption_annotations_list,
                      zip_handler,
                      category_index,
                      include_masks=False,
                      subdir=None,
                      proposal_data_path=None):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    caption_annotations_list:
      list of dicts with keys:
      [u'image_id', u'id', u'caption']
    zip_handler: class for reading and writing ZIP files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  with zip_handler.open(subdir + '/' + filename, "r") as fid:
    encoded_jpg = fid.read()

  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  npy_path = os.path.join(proposal_data_path, '{}/{}.npy'.format(
      image_id % 10, image_id))

  with open(npy_path, 'rb') as fid:
    proposals = np.load(fid)

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())

  caption_string = []
  caption_offset = []
  caption_length = []
  for caption_annotations in caption_annotations_list:
    caption = _process_caption(caption_annotations['caption'])
    caption_offset.append(len(caption_string))
    caption_length.append(len(caption))
    caption_string.extend(caption)
  caption_string = [caption.encode('utf8') for caption in caption_string]

  feature_dict = {
      'image/height':
      dataset_util.int64_feature(image_height),
      'image/width':
      dataset_util.int64_feature(image_width),
      'image/filename':
      dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
      dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
      dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
      dataset_util.bytes_feature(encoded_jpg),
      'image/format':
      dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
      dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
      dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
      dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
      dataset_util.float_list_feature(ymax),
      'image/object/class/label':
      dataset_util.int64_list_feature(category_ids),
      'image/object/class/text':
      dataset_util.bytes_list_feature(category_names),
      'image/object/is_crowd':
      dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
      dataset_util.float_list_feature(area),
      'image/caption/string':
      dataset_util.bytes_list_feature(caption_string),
      'image/caption/offset':
      dataset_util.int64_list_feature(caption_offset),
      'image/caption/length':
      dataset_util.int64_list_feature(caption_length),
      'image/proposal/bbox/ymin':
      dataset_util.float_list_feature(proposals[:, 0].tolist()),
      'image/proposal/bbox/xmin':
      dataset_util.float_list_feature(proposals[:, 1].tolist()),
      'image/proposal/bbox/ymax':
      dataset_util.float_list_feature(proposals[:, 2].tolist()),
      'image/proposal/bbox/xmax':
      dataset_util.float_list_feature(proposals[:, 3].tolist()),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Esempio n. 21
0
def create_tf_example(image_path, img, xmin, ymin, xmax, ymax, classes, classes_text=(),
                      truncated=(), poses=(), difficult_obj=(), source_id='', coordinate_normalize=False):
    """
    创建 tf example 实例
    :param img:
    :param image_path: 图片路径
    :param xmin: 多边形的boundingbox
    :param ymin:
    :param xmax:
    :param ymax:
    :param classes: 类别
    :param classes_text:
    :param truncated:
    :param poses:
    :param difficult_obj:
    :param source_id:
    :return: 创建好的tf example
    """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()
    height = img.shape[0]
    width = img.shape[1]
    if coordinate_normalize:
        xmin = [float(x) / width for x in xmin]
        xmax = [float(x) / width for x in xmax]
        ymin = [float(y) / height for y in ymin]
        ymax = [float(y) / height for y in ymax]
    example = tf.train.Example(
        features=tf.train.Features(feature={
            'image/height':
                dataset_util.int64_feature(height),
            'image/width':
                dataset_util.int64_feature(width),
            'image/filename':
                dataset_util.bytes_feature(
                    os.path.basename(image_path).encode('utf8')),
            'image/source_id':
                dataset_util.bytes_feature(
                    os.path.basename(source_id).encode('utf8')),
            'image/key/sha256':
                dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
            'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymax),
            'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            'image/object/difficult':
                dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
                dataset_util.int64_list_feature(truncated),
            'image/object/view':
                dataset_util.bytes_list_feature(poses),
        }))
    return example
def create_tf_example(annotation_dict_key, annotation_dict_value, image_path):
    """
    Generates a tf Example for each image represented by dictionary key and the
    bounding boxes and labels represented by annotation_dict_value, with the image path
    """
    #print(annotation_dict)
    image_format = b'png'  #change to jpg or jpeg if required
    classes = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    #Read the images from the corresponding folder based on type
    bboxes_class = list(annotation_dict_value.split(" "))
    if len(bboxes_class) % 5 == 0:
        #Exclude the annotated data and image if the length is incorrect
        print(bboxes_class)
        with tf.io.gfile.GFile(os.path.join(image_path, annotation_dict_key),
                               'rb') as fid:
            encoded_png = fid.read()
        encoded_png_io = io.BytesIO(encoded_png)
        key = hashlib.sha256(encoded_png).hexdigest().encode('utf8')
        image = Image.open(encoded_png_io)
        width, height = image.size
        filename = annotation_dict_key.encode('utf8')

        for i in range(len(bboxes_class)):
            if i % 5 == 0:
                classes.append(int(bboxes_class[i]))
            elif i % 5 == 1:
                xmins.append(np.float32(bboxes_class[i]) / width)
            elif i % 5 == 2:
                ymins.append(np.float32(bboxes_class[i]) / width)
            elif i % 5 == 3:
                xmaxs.append(np.float32(bboxes_class[i]) / width)
            else:
                ymaxs.append(np.float32(bboxes_class[i]) / width)

        classes_text = list(map(int_to_class_label, classes))
        #print(classes)
        #print(classes_text)
        #Extract bounding boxes and its labels and the corresponding file information as a tensorflow example
        #Fundamentally, a tf.Example is a {"string": tf.train.Feature} mapping.
        feature_dict = {
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/key/sha256':
            dataset_util.bytes_feature(key)  #Generate your own
        }
        tf_example = tf.train.Example(features=tf.train.Features(
            feature=feature_dict))
        return tf_example
Esempio n. 23
0
def create_tf_example(data, label_map_dict, image_dir, ignore_occluded=False):
  """Convert YAML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding YAML fields for a single image
    label_map_dict: A map from string label names to integers ids.
    image_dir: String specifying the directory holding the actual image data.
    ignore_occluded: Whether to skip occluded instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  filename = data['path']
  img_path = os.path.join(image_dir, filename)

  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = PIL.Image.open(encoded_png_io)
  if image.format != 'PNG':
    raise ValueError('Image format not PNG')
  key = hashlib.sha256(encoded_png).hexdigest()

  width = 1280
  height = 720

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []

  for obj in data['boxes']:

    if ignore_occluded and obj['occluded']:
      continue

    xmin.append(float(obj['x_min']) / width)
    ymin.append(float(obj['y_min']) / height)
    xmax.append(float(obj['x_max']) / width)
    ymax.append(float(obj['y_max']) / height)

    class_name = obj['label']
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          filename.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          filename.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  return example
Esempio n. 24
0
def dict_to_tf_example(data, label_map_dict, image_subdirectory):
    """Convert XML derived dict to tf.Example proto.
    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.
    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      label_map_dict: A map from string label names to integers ids.
      image_subdirectory: String specifying subdirectory within the
        Pascal dataset directory holding the actual image data.
    Returns:
      example: The converted tf.Example.
    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult_obj.append(int(0))
            xmin = float(obj['bndbox']['xmin'])
            xmax = float(obj['bndbox']['xmax'])
            ymin = float(obj['bndbox']['ymin'])
            ymax = float(obj['bndbox']['ymax'])
            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)
            class_name = obj["name"]
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])
            truncated.append(int(0))
            poses.append('Unspecified'.encode('utf8'))

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Esempio n. 25
0
def json_to_tf_example(json_data,
                       dataset_directory,
                       label_map_dict ):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    width  = int(json_data.get("image_width"))
    height = int(json_data.get("image_height"))

    filename=orig_filename = json_data.get("filename")
    full_path=orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename)

    with tf.gfile.GFile(orig_full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    width,height=image.size

    # filename = json_data.get("filename")+".scaled.jpg"
    # #img_path = os.path.join(FLAGS.data_dir,"photos", filename)
    # full_path = os.path.join(FLAGS.data_dir,"photos", filename)
    # if not os.path.exists(full_path):
    #     #raise ValueError('Please scale image :convert abc.jpg -resize 756x1008 sss.jpg')
    #     orig_filename = json_data.get("filename")
    #     orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename)
    #     #image = PIL.Image.open(orig_full_path)
    #     ##image.resize((756,1008), resample=PIL.Image.BILINEAR).save(full_path)
    #     #image.resize((756,1008), resample=PIL.Image.NEAREST).save(full_path)
    #     os.system("convert "+orig_full_path+" -resize 756x1008 "+full_path)


    #full_path = os.path.join(dataset_directory, img_path)
    #with tf.gfile.GFile(full_path, 'rb') as fid:
    #    encoded_jpg = fid.read()
    #encoded_jpg_io = io.BytesIO(encoded_jpg)
    #image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width,height=image.size;

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for obj in json_data.get("bndboxes"):
        difficult_obj.append(0)
        xmin.append(float(obj.get("x")) / width)
        ymin.append(float(obj.get("y")) / height)
        xmax.append(numpy.clip(float(obj.get("x")+obj.get("w")) / width,0,1))
        ymax.append(numpy.clip(float(obj.get("y")+obj.get("h")) / height,0,1))
        classes_text.append(obj.get("id").encode('utf8'))
        classes.append(label_map_dict[obj.get("id")])
        truncated.append(int(0))
        poses.append("Unspecified")

    width,height=image.size

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
Esempio n. 26
0
def create_tf_examples(ids, images_dir, db_path):
    """Generates a TFRecord for given set of training examples.

	Inputs:
		ids         - list of training example IDs
		images_dir  - absolute path to directory containing images
		db_path     - absolute path to database with metadata and labels

	"""

    # list of training examples
    examples = []

    # open the database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # populate the list of training examples
    for id in ids:
        # get image dimensions
        img_path = images_dir + '/' + str(id) + '.jpg'
        img = cv.imread(img_path, cv.IMREAD_COLOR)
        height, width, channels = img.shape

        # populate dictionary with training example data
        cursor.execute("SELECT * FROM TrainingExample WHERE id=?", (id, ))
        row = cursor.fetchone()
        example = {
            'id': row[0],
            'img_path': img_path,
            'img_height': height,
            'img_width': width,
            'camera_angle': row[2],
            'camera_height': row[3],
            'light_angle': row[4],
            'labels': []
        }

        # populate label information
        cursor.execute("SELECT * FROM Label WHERE image_id=?", (id, ))
        for row in cursor.fetchall():
            example['labels'].append({
                'id': row[0],
                'img_id': row[1],
                'xmin': min(row[2], row[3]),
                'xmax': max(row[2], row[3]),
                'ymin': min(row[4], row[5]),
                'ymax': max(row[4], row[5])
            })

        examples.append(example)

    # close the database
    cursor.close()
    conn.close()

    # Create TFRecord
    tf_examples = []
    for example in examples:
        # encode image bytes
        encoded_image_data = tf.gfile.FastGFile(example['img_path'],
                                                'rb').read()

        if len(example['labels']) != 1:
            continue

        # normalize label coordinates
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []
        for label in example['labels']:
            xmins.append(label['xmin'] / example['img_width'])
            xmaxs.append(label['xmax'] / example['img_width'])
            ymins.append(label['ymin'] / example['img_height'])
            ymaxs.append(label['ymax'] / example['img_height'])
            classes_text.append(b'rock')
            classes.append(1)

        # populate a TFRecord entry for the training example
        tf_examples.append(
            tf.train.Example(features=tf.train.Features(
                feature={
                    'image/height':
                    dataset_util.int64_feature(example['img_height']),
                    'image/width':
                    dataset_util.int64_feature(example['img_width']),
                    'image/filename':
                    dataset_util.bytes_feature(example['img_path'].encode(
                        'utf8')),
                    'image/source_id':
                    dataset_util.bytes_feature(example['img_path'].encode(
                        'utf8')),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_image_data),
                    'image/format':
                    dataset_util.bytes_feature(b'jpeg'),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature(xmins),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature(xmaxs),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature(ymins),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature(ymaxs),
                    'image/object/class/text':
                    dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label':
                    dataset_util.int64_list_feature(classes),
                })))

    # Return the TFRecord entries
    return tf_examples
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue

    difficult_obj.append(int(difficult))

    xmin.append(float(obj['bndbox']['xmin']) / width)
    ymin.append(float(obj['bndbox']['ymin']) / height)
    xmax.append(float(obj['bndbox']['xmax']) / width)
    ymax.append(float(obj['bndbox']['ymax']) / height)
    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }))
  return example
Esempio n. 28
0
    def lab_format_to_tf_example(self, json_content):
        # turn the lab-format directory into the format accepted by tfrecord

        in_img_path = os.path.join(json_content["folder"],
                                   json_content["filename"])

        with tf.gfile.GFile(in_img_path, 'rb') as fid:
            encoded_img = fid.read()

        size, pix = image.get_img_info(in_img_path)
        width = int(size[0])
        height = int(size[1])

        filename = json_content['filename'].encode('utf8')
        _, file_extension = os.path.splitext(json_content['filename'])

        # get image format. e.g. '.jpg'
        file_extension = file_extension.replace('.', '')
        image_format = '{}'.format(file_extension).encode()

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []

        for anno in json_content['annotations']:
            if anno['label'] not in self.classes:
                continue
            xmins.append(anno['left'] / width)
            xmaxs.append(anno['right'] / width)
            ymins.append(anno['top'] / height)
            ymaxs.append(anno['bottom'] / height)
            classes_text.append(anno['label'].encode('utf8'))
            classes.append(self.class_to_int(anno['label']))

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_img),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        return tf_example
Esempio n. 29
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Esempio n. 30
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for idx, object_annotations in enumerate(annotations_list):
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            binary_mask = mask.decode(object_annotations['segmentation'])
            mask_shape = binary_mask.shape
            binary_mask = np.ravel(binary_mask, order='F')
            binary_mask = binary_mask.reshape(mask_shape, order='C')

            if FLAGS.dump_masks:
                binary_mask[binary_mask > 0] = 255
                pil_image = PIL.Image.fromarray(binary_mask)
                pil_image.save(FLAGS.train_image_dir + "/mask_" +
                               str(image_id) + "_" + str(idx) + ".png")
            else:
                pil_image = PIL.Image.fromarray(binary_mask)

            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
    def _create_tf_example(self, image_dir, image_filename):

        image_name = os.path.splitext(image_filename)[0]
        image_format = os.path.splitext(image_filename)[1]

        image_path = image_dir + "/" + image_filename

        with open(image_path, 'rb') as image_file:
            image_data = image_file.read()
            image_data = bytearray(image_data)

        img = Image.open(image_path)
        width, height = img.size

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []
        with open(self.bbox_dir + "/" + image_name + ".txt",
                  'r') as annotation_file:
            annotation_lines = annotation_file.readlines()
            annotation_lines.pop(0)
            for annotation_line in annotation_lines:
                coords = annotation_line.strip().split(" ")
                x1 = int(coords[0])
                y1 = int(coords[1])
                x2 = int(coords[2])
                y2 = int(coords[3])
                annotation_label = coords[4]
                annotation_class_id = self.labels.index(annotation_label)

                xmins.append(x1 / width)
                xmaxs.append(x2 / width)
                ymins.append(y1 / height)
                ymaxs.append(y2 / height)
                classes_text.append(bytes(annotation_label, 'utf-8'))
                classes.append(annotation_class_id)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(bytes(image_filename, 'utf-8')),
                'image/source_id':
                dataset_util.bytes_feature(bytes(image_filename, 'utf-8')),
                'image/encoded':
                dataset_util.bytes_feature(bytes(image_data)),
                'image/format':
                dataset_util.bytes_feature(bytes(image_format, 'utf-8')),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))

        return tf_example
def create_tf_example(root,image_file):
  # Check if annotation xml file exists
  filename, file_extension = os.path.splitext(image_file)

  with tf.gfile.GFile(os.path.join(root, image_file), 'rb') as fid:
     encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)

  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')


  with tf.gfile.GFile(os.path.join(root, image_file), 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  width, height = image.size
  print(image_file,width,height)
  BB = get_cordinates(filename)
  xmin = []
  ymin = []
  ymax = []
  xmax = []
  classes_id = []
  classes_text = []

  
  #for each label in all labels
  # BB = all labels
  # item = one label (4 coordinates, 1 class id, 1 class name)  
  
  if BB:
    
    for item in BB:
        coordinates = item[1]                
        Y=[]
        X=[]
        for coord in coordinates:
            Y.append(coord['y'])
            X.append(coord['x'])
            
        xmin.append(float(min(X)/width))
        xmax.append(float(max(X)/width))
        ymin.append(float(min(Y)/height))
        ymax.append(float(max(Y)/height)) 
#        classes_id.append(get_class_id(item[0]))
        classes_id.append(get_class_id_4_classes(item[0]))
        #print (item[0])
        classes_text.append(item[0].encode('utf8'))  
          
    print ('CLASSES_TEXT_after',classes_text)      
    enter = True
    if enter:   
        
        filename = image_file
        
    tf_example=[]
        
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        #'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value = [bytes(image_file, encoding= 'utf-8')])),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),        
        'image/object/class/label': dataset_util.int64_list_feature(classes_id),
    }))
    return tf_example
Esempio n. 33
0
def create_tf_example(example):

    # Bosch
    height = 720  # Image height
    width = 1280  # Image width

    filename = example[
        'path']  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['path'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'png'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for box in example['boxes']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['x_min'] / width))
        xmaxs.append(float(box['x_max'] / width))
        ymins.append(float(box['y_min'] / height))
        ymaxs.append(float(box['y_max'] / height))
        classes_text.append(box['label'].encode())
        classes.append(int(LABEL_DICT[box['label']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
def dict_to_tf_example(data, image_subdirectory='JPEGImages'):
    """
    Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
            running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
                                dataset  (default: False).
    1image_subdirectory: String specifying subdirectory within the
                        PASCAL dataset directory holding the actual image data.

    Returns:
        example: The converted tf.Example.

    Raises:
        ValueError: if the image pointed to by data['filename']
                    is not a valid JPEG
  """

    full_path = os.path.join('/home/lion/dataset', data['file'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    poses = []
    classes = []
    classes_text = []

    width = int(data['width'])
    height = int(data['height'])
    xmin.append(float(data['bbox']['x1']) / width)
    xmax.append(float(data['bbox']['x2']) / width)

    if int(data['category_class']) == 2:  #excepyion about bottom
        ymin.append(float(data['bbox']['y1'] + 8) / height)
    else:
        ymin.append(float(data['bbox']['y1']) / height)

    if int(data['category_class']) == 1:  #exception about top
        ymax.append(float(data['bbox']['y2'] - 8) / height)
    else:
        ymax.append(float(data['bbox']['y2']) / height)

    classes_text.append(data['category_name'].encode('utf8'))
    classes.append(int(data['category_class']))
    difficult = [0]
    truncated = [0]
    poses.append('Frontal'.encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['file'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(str(data['_id']).encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Esempio n. 35
0
def create_tf_example(image, annotations_list, include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    # image_id = image['id']
    image_id = filename

    #full_path = os.path.join(image_dir, filename)
    full_path = filename
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        # (x, y, width, height) = tuple(object_annotations['bbox'])
        # if width <= 0 or height <= 0:
        #   num_annotations_skipped += 1
        #   continue
        # if x + width > image_width or y + height > image_height:
        #   num_annotations_skipped += 1
        #   continue
        xmin_tmp, ymin_tmp, xmax_tmp, ymax_tmp = object_annotations[
            'xmin'], object_annotations['ymin'], object_annotations[
                'xmax'], object_annotations['ymax']
        # xmin.append(float(x) / image_width)
        # xmax.append(float(x + width) / image_width)
        # ymin.append(float(y) / image_height)
        # ymax.append(float(y + height) / image_height)
        xmin.append(xmin_tmp)
        ymin.append(ymin_tmp)
        xmax.append(xmax_tmp)
        ymax.append(ymax_tmp)

        is_crowd.append(False)
        category_id = object_annotations['label_idx']
        #category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(object_annotations['label_name'].encode('utf8'))
        area.append(0)

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Esempio n. 36
0
def dict_to_tf_example(data, image_dir, label_map_dict):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding
    box coordinates provided by the raw data.

    Arguments:
        data: dict holding XML fields for a single image (obtained by
          running dataset_util.recursive_parse_xml_to_dict)
        image_dir: Path to image directory.
        label_map_dict: A map from string label names to integers ids.

    Returns:
        example: The converted tf.Example.
    """
    full_path = os.path.join(image_dir, data['filename'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    try:
        for obj in data['object']:
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
    except KeyError:
        print(data['filename'] + ' without objects!')

    difficult_obj = [0] * len(classes)
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj)
        }))
    return example
def dict_to_tf_example(data, annotations_dir, images_dir, label_map_dict,
                       include_masks, ignore_difficult_instances):
    image_path = os.path.join(images_dir, data['filename'])
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width = int(data['size']['width'])
    height = int(data['size']['height'])
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for idx, obj in enumerate(data['object']):
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))
            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            class_id = getClassId(obj['name'], label_map_dict)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(class_id)
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

            if include_masks:
                mask_path = os.path.join(
                    annotations_dir, "masks",
                    os.path.splitext(data['filename'])[0] + '_' + str(idx) +
                    '.png')
                with tf.gfile.GFile(mask_path, 'rb') as fid:
                    encoded_mask_png = fid.read()
                encoded_png_io = io.BytesIO(encoded_mask_png)
                mask = PIL.Image.open(encoded_png_io)
                if mask.format != 'PNG':
                    raise ValueError('Mask format not PNG')
                mask_np = np.asarray(mask)
                mask_remapped = (mask_np == 255).astype(np.uint8)
                masks.append(mask_remapped)
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }

    if include_masks:
        encoded_mask_png_list = []
        for mask in masks:
            img = PIL.Image.fromarray(mask)
            output = io.BytesIO()
            img.save(output, format='PNG')
            encoded_mask_png_list.append(output.getvalue())
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png_list))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Esempio n. 38
0
def dict_to_tf_example(data, label_map_dict, img_path):

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = image.width
    height = image.height

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []

    for shape in data['Layers']['Layer']['Shapes']['Shape']:

        text = shape['BlockText']['Text'].text
        if not (text.startswith('Panel') or text.startswith('panel')):
            continue

        attrib = shape['Data']['Extent'].attrib
        x = float(attrib['X'])
        y = float(attrib['Y'])
        w = float(attrib['Width'])
        h = float(attrib['Height'])

        xmin = x
        xmax = x + w
        ymin = y
        ymax = y + h

        xmin /= width
        ymin /= height
        xmax /= width
        ymax /= height

        if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01:
            print(img_path)

        xmins.append(xmin)
        ymins.append(ymin)
        xmaxs.append(xmax)
        ymaxs.append(ymax)

        class_name = 'Panel'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def dict_to_tf_example(data, dataset_directory, set_name, id, full_data):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      set_name: name of the set training, validation or test
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    full_path = os.path.join(dataset_directory, 'images', data['filename'])
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_image = fid.read()
    encoded_image_io = io.BytesIO(encoded_image)
    image = PIL.Image.open(encoded_image_io)
    if image.format != 'JPEG' and image.format != 'PNG':
        raise ValueError('Image format not JPEG or PNG')
    key = hashlib.sha256(encoded_image).hexdigest()

    width = int(data['width'])
    height = int(data['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for obj in data['ann_ids']:

        ann = full_data['annotations'][obj]

        xmin.append(float(ann['a_bbox'][0]) / width)
        ymin.append(float(ann['a_bbox'][1]) / height)
        xmax.append(float(ann['a_bbox'][2]) / width)
        ymax.append(float(ann['a_bbox'][3]) / height)
        classes_text.append(
            full_data['categories'][ann['cat_id'][0]]['name'].encode('utf8'))
        classes.append(int(ann['cat_id'][0]))
        # Not sure if these are needed but ill leave them for compatibility
        truncated.append(int(0))
        poses.append("Unspecified".encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image.format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Esempio n. 40
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  mask_np = np.asarray(mask)
  nonbackground_indices_x = np.any(mask_np != 2, axis=0)
  nonbackground_indices_y = np.any(mask_np != 2, axis=1)
  nonzero_x_indices = np.where(nonbackground_indices_x)
  nonzero_y_indices = np.where(nonbackground_indices_y)

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))

    if faces_only:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])
    else:
      xmin = float(np.min(nonzero_x_indices))
      xmax = float(np.max(nonzero_x_indices))
      ymin = float(np.min(nonzero_y_indices))
      ymax = float(np.max(nonzero_y_indices))

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
      mask_remapped = (mask_np != 2).astype(np.uint8)
      masks.append(mask_remapped)

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
    if mask_type == 'numerical':
      mask_stack = np.stack(masks).astype(np.float32)
      masks_flattened = np.reshape(mask_stack, [-1])
      feature_dict['image/object/mask'] = (
          dataset_util.float_list_feature(masks_flattened.tolist()))
    elif mask_type == 'png':
      encoded_mask_png_list = []
      for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
      feature_dict['image/object/mask'] = (
          dataset_util.bytes_list_feature(encoded_mask_png_list))

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
Esempio n. 41
0
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory,
                            os.path.basename(data['filename']))
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        logging.warning("Image format not jpeg, trying to convert to jpeg")
        try:
            image = image.convert('RGB')
        except:
            logging.exception("error converting to jpeg")
            return None

    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    if width < 0 or height < 0:
        width, height = image.size

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    if not 'object' in data:
        return

    for obj in data['object']:
        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        classes_text.append(obj['name'].encode('utf8'))
        classes.append(label_map_dict[obj['name']])

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Esempio n. 42
0
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0] * len(xmin_norm)

    print(image_path)
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('jpg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.float_list_feature(annotations['truncated']),
            'image/object/alpha':
            dataset_util.float_list_feature(annotations['alpha']),
            'image/object/3d_bbox/height':
            dataset_util.float_list_feature(annotations['3d_bbox_height']),
            'image/object/3d_bbox/width':
            dataset_util.float_list_feature(annotations['3d_bbox_width']),
            'image/object/3d_bbox/length':
            dataset_util.float_list_feature(annotations['3d_bbox_length']),
            'image/object/3d_bbox/x':
            dataset_util.float_list_feature(annotations['3d_bbox_x']),
            'image/object/3d_bbox/y':
            dataset_util.float_list_feature(annotations['3d_bbox_y']),
            'image/object/3d_bbox/z':
            dataset_util.float_list_feature(annotations['3d_bbox_z']),
            'image/object/3d_bbox/rot_y':
            dataset_util.float_list_feature(annotations['3d_bbox_rot_y']),
        }))

    return example
def create_tf_example(example, label_map_dict):
    mask_paths = get_mask_paths(example)

    with tf.io.gfile.GFile(example, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)

    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    key = hashlib.sha256(encoded_jpg).hexdigest()

    classes = []
    masks = []
    bboxes = []

    for label, mp in mask_paths.items():
        mask = cv2.imread(mp, cv2.IMREAD_UNCHANGED)
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
        mask = cv2.threshold(mask, 0, 255,
                             cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

        ret, labels = cv2.connectedComponents(mask)

        # The first component is the background, so we skip it
        for l in range(1, ret):
            cardmask = np.zeros(labels.shape, dtype=np.uint8)
            cardmask[labels == l] = 1

            if np.sum(cardmask) > 2000:
                bbox = bounding_box(cardmask)
                classes.append(label)
                masks.append(cardmask)
                bboxes.append(bbox)
            else:
                logging.info(
                    "%s: object %s discarded, item too small. Size %d",
                    example, label, np.sum(cardmask))

    #height = image.shape[1] # Image height
    #width = image.shape[0] # Image width
    width, height = image.size

    filename = example
    encoded_image_data = encoded_jpg  # Encoded image bytes
    image_format = 'jpeg'  # b'jpeg' or b'png'

    xmins = [
        bb[2] / width for bb in bboxes
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [bb[3] / width for bb in bboxes
             ]  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
        bb[0] / width for bb in bboxes
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [bb[1] / width for bb in bboxes
             ]  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = map(
        lambda x: x.encode('utf8'),
        classes)  # List of string class name of bounding box (1 per box)
    classes = list(
        map(lambda x: label_map_dict[x],
            classes))  # List of integer class id of bounding box (1 per box)

    encoded_mask_png_list = []
    for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/mask':
            dataset_util.bytes_list_feature(encoded_mask_png_list)
        }))
    return tf_example
Esempio n. 44
0
def create_tf_example(example):
    """Converts an example from the cars196 dataset into a format suitable for the
  TensorFlow Object Detection API.

  Args:
    example: A dictionary containing at least the following keys:
      {
        'image': A float32 Tensor of shape `[height, width, 3]`.
        'bbox': A float32 Tensor of shape `[4]` containing the normalized bounding
          box coordinates in the order `[ymin, xmin, ymax, xmax]`.
      }

  Returns:
     encoded TFRecord example.
  """
    height, width = example['image'].shape[:-1]
    filename = b''  # Filename of the image. Empty if image is not from file
    encoded_image_data = tf.image.encode_jpeg(
        example['image']).numpy()  # Encoded image
    image_format = b'jpeg'

    ymins = [example['bbox'][0]]
    xmins = [example['bbox'][1]]
    ymaxs = [example['bbox'][2]]
    xmaxs = [example['bbox'][3]]

    # add 1 to label, since it was stored as [0, 195] instead of [1, 196]
    label = example['label'].numpy() + 1
    classes_text = [CATEGORY_INDEX[label]['name'].encode('utf-8')
                    ]  # List of string class name of bounding box
    classes = [label]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
def prepare_example(image_path, annotations, label_map_dict):
  """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
  with tf.gfile.GFile(image_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = pil.open(encoded_png_io)
  image = np.asarray(image)

  key = hashlib.sha256(encoded_png).hexdigest()

  width = int(image.shape[1])
  height = int(image.shape[0])

  xmin_norm = annotations['2d_bbox_left'] / float(width)
  ymin_norm = annotations['2d_bbox_top'] / float(height)
  xmax_norm = annotations['2d_bbox_right'] / float(width)
  ymax_norm = annotations['2d_bbox_bottom'] / float(height)

  difficult_obj = [0]*len(xmin_norm)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
      'image/object/class/text': dataset_util.bytes_list_feature(
          [x.encode('utf8') for x in annotations['type']]),
      'image/object/class/label': dataset_util.int64_list_feature(
          [label_map_dict[x] for x in annotations['type']]),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.float_list_feature(
          annotations['truncated']),
      'image/object/alpha': dataset_util.float_list_feature(
          annotations['alpha']),
      'image/object/3d_bbox/height': dataset_util.float_list_feature(
          annotations['3d_bbox_height']),
      'image/object/3d_bbox/width': dataset_util.float_list_feature(
          annotations['3d_bbox_width']),
      'image/object/3d_bbox/length': dataset_util.float_list_feature(
          annotations['3d_bbox_length']),
      'image/object/3d_bbox/x': dataset_util.float_list_feature(
          annotations['3d_bbox_x']),
      'image/object/3d_bbox/y': dataset_util.float_list_feature(
          annotations['3d_bbox_y']),
      'image/object/3d_bbox/z': dataset_util.float_list_feature(
          annotations['3d_bbox_z']),
      'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
          annotations['3d_bbox_rot_y']),
  }))

  return example
Esempio n. 46
0
def create_tf_example(example):
    # TODO(user): Populate the following variables from your example.
    height = 640  # Image height
    width = 360  # Image width
    filename = example[
        'img_path']  # Filename of the image. Empty if image is not from file
    # encoded_image_data = imutils.resize(cv2.imread(filename), width=360).tobytes() # Encoded image bytes
    encoded_image_data = tf.gfile.FastGFile(filename, 'rb').read()
    decoded_image = tf.image.decode_jpeg(encoded_image_data)
    decoded_image_resized = tf.cast(
        tf.image.resize_images(decoded_image, [height, width]), tf.uint8)
    encoded_image_data = tf.image.encode_jpeg(decoded_image_resized)
    encoded_image_data = tf.Session().run(encoded_image_data)
    image_format = 'jpeg'  # b'jpeg' or b'png'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    for i in range(len(example['boxes'])):
        xmins.append(float(example['boxes'][i][0]))
        xmaxs.append(float(example['boxes'][i][2]))
        ymins.append(float(example['boxes'][i][1]))
        ymaxs.append(float(example['boxes'][i][3]))

    classes_text = list()
    classes = list()
    for i in range(len(example['boxes'])):
        # classes_text.append('/m/01kb5c')
        # classes.append(546)
        if example['class'] == 21:
            classes.append(21)
            classes_text.append('/m/01xq0k1')
        elif example['class'] == 19:
            classes.append(19)
            classes_text.append('/m/03k3r')
        # if 'strange' in filename:
        #     classes_text.append('/m/0jbk')
        #     classes.append(13)
        # else:
        #     classes_text.append('/m/01xq0k1')
        #     classes.append(13)
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    xmins.append(annotation['x'])
                    xmaxs.append(annotation['x2'])
                    ymins.append(annotation['y'])
                    ymaxs.append(annotation['y2'])
                    classes_text.append(annotation['label'].encode('utf8'))
                    classes.append(1) # temporary, I need to assign labels to actual ids
                    difficult_obj.append(0)
                    truncated.append(0)
                    poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')
Esempio n. 48
0
def create_tf_example(image):
    # Bosch
    height = 960  # Image height
    width = 1280  # Image width

    filename = image['filename']
    filename = filename.encode()

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for sign in image['signs']:
        xmins.append(float(sign['xmin'] / width))
        xmaxs.append(float(sign['xmax'] / width))
        ymins.append(float(sign['ymin'] / height))
        ymaxs.append(float(sign['ymax'] / height))
        classes_text.append(sign['name'].encode())
        classes.append(LABELS.index(sign['name']) + 1)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join('VOC2007', image_subdirectory,
                            data['filename'] + '.jpg')
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        classes_text.append(obj['name'].encode('utf8'))
        classes.append(label_map_dict[obj['name']])
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Esempio n. 50
0
def create_tf_example(file_name,
                      masks,
                      class_names,
                      label_map_dict,
                      image_directory,
                      image_size=(768, 768)):

    height = image_size[0]
    width = image_size[1]
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    encoded_masks = []

    # Read image
    img_path = os.path.join(image_directory, file_name)

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    key = hashlib.sha256(encoded_jpg).hexdigest()

    # Look up class id
    class_ids = [label_map_dict[class_name] for class_name in class_names]

    # Encode class names into bytes
    class_names = [name.encode('utf8') for name in class_names]

    # Encode mask into png and get bounding box coordinates
    for mask in masks:
        mask_array = convert_mask_rle_to_img_array(mask)
        encoded_mask = convert_img_array_to_png_str(mask_array)
        encoded_masks.append(encoded_mask)

        xmin, xmax, ymin, ymax = get_bbox_coordinates(mask_array)

        xmins.append(xmin / width)
        xmaxs.append(xmax / width)
        ymins.append(ymin / height)
        ymaxs.append(ymax / height)

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(file_name.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(class_names),
        'image/object/class/label': dataset_util.int64_list_feature(class_ids),
        'image/object/mask': dataset_util.bytes_list_feature(encoded_masks),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))

    return example