def testDecodeEmptyPngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    encoded_masks = []
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks),
                'image/height':
                    dataset_util.int64_feature(10),
                'image/width':
                    dataset_util.int64_feature(10),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
          [0, 10, 10])
def create_tf_example(example, writer):
  height = example['height']
  width = example['width']
  filename = example['filename']
  encoded_image_data = example['encoded_image_data']
  image_format = example['image_format']

  bboxes = example['bbox']
  xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box
  ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box
  classes_text = example['class_text']
  classes = example['class_idx']

  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  writer.write(tf_example.SerializeToString())
def create_tf_example(row):
    full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [class_text_to_int(row['class'])]

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
    image_width = 3

    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
         .get_shape().as_list()), [4, 5, 3])

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
                         .get_shape().as_list()), [4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        instance_masks.astype(np.float32),
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
    self.assertAllEqual(object_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
def dict_to_tf_example(data,
                       label_map_dict,
                       data_dir):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    """
  
    img_path = os.path.join(data_dir, data.replace("mask", "images"))
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = 512
    height = 512

    classes = []
    classes_text = []
    encoded_mask_png_list = []
    mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255
    output = io.BytesIO()
    encoded_mask_png_list.append(mask_png.save(output, mask_png))
    class_name = 'water'
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])


    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
def createTfExample(singleFileData, path):
    # use TensorFlow's GFile function to open the .jpg image matching the current box data
    with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile:
        tensorFlowImage = tensorFlowImageFile.read()
    # end with

    # get the image width and height via converting from a TensorFlow image to an io library BytesIO image,
    # then to a PIL Image, then breaking out the width and height
    bytesIoImage = io.BytesIO(tensorFlowImage)
    pilImage = Image.open(bytesIoImage)
    width, height = pilImage.size

    # get the file name from the file data passed in, and set the image format to .jpg
    fileName = singleFileData.filename.encode('utf8')
    imageFormat = b'jpg'

    # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer
    xMins = []
    xMaxs = []
    yMins = []
    yMaxs = []
    classesAsText = []
    classesAsInts = []

    # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box)
    for index, row in singleFileData.object.iterrows():
        xMins.append(row['xmin'] / width)
        xMaxs.append(row['xmax'] / width)
        yMins.append(row['ymin'] / height)
        yMaxs.append(row['ymax'] / height)
        classesAsText.append(row['class'].encode('utf8'))
        classesAsInts.append(classAsTextToClassAsInt(row['class']))
    # end for

    # finally we can calculate and return the TensorFlow Example
    tfExample = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(fileName),
        'image/source_id': dataset_util.bytes_feature(fileName),
        'image/encoded': dataset_util.bytes_feature(tensorFlowImage),
        'image/format': dataset_util.bytes_feature(imageFormat),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xMins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(yMins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classesAsText),
        'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)}))

    return tfExample
def create_tf_example(filename, writer):
    lines = open(filename).readlines()
    image_filename = lines[0].strip()[1:]
    classes_text = []
    classes = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    im = Image.open(image_filename)
    arr = io.BytesIO()
    im.save(arr, format='PNG')
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    image_format = 'png'
    for line in lines[1:]:
        line = line.strip()
        if line == '':
            continue
        data = line.split(",")
        bbox = list(map(int, map(float, data[:4])))
        class_text = data[4].strip()
        class_idx = labels.index(class_text)
        classes_text.append(class_text)
        classes.append(class_idx)
        xmins.append(bbox[0]/float(width))
        xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box
        ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    writer.write(tf_example.SerializeToString())
  def testInstancesNotAvailableByDefault(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature('jpeg'),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertTrue(
        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
  def create_tf_record(self):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
    flat_mask = (4 * 5) * [1.0]
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/height': dataset_util.int64_feature(4),
        'image/width': dataset_util.int64_feature(5),
        'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
        'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
        'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
        'image/object/class/label': dataset_util.int64_list_feature([2]),
        'image/object/mask': dataset_util.float_list_feature(flat_mask),
    }))
    writer.write(example.SerializeToString())
    writer.close()

    return path
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0] * len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.float_list_feature(annotations['truncated']),
            'image/object/alpha':
            dataset_util.float_list_feature(annotations['alpha']),
            'image/object/3d_bbox/height':
            dataset_util.float_list_feature(annotations['3d_bbox_height']),
            'image/object/3d_bbox/width':
            dataset_util.float_list_feature(annotations['3d_bbox_width']),
            'image/object/3d_bbox/length':
            dataset_util.float_list_feature(annotations['3d_bbox_length']),
            'image/object/3d_bbox/x':
            dataset_util.float_list_feature(annotations['3d_bbox_x']),
            'image/object/3d_bbox/y':
            dataset_util.float_list_feature(annotations['3d_bbox_y']),
            'image/object/3d_bbox/z':
            dataset_util.float_list_feature(annotations['3d_bbox_z']),
            'image/object/3d_bbox/rot_y':
            dataset_util.float_list_feature(annotations['3d_bbox_rot_y']),
        }))

    return example
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult_obj.append(int(0))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)

        class_name = obj['name']
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])
        truncated.append(int(0))
        poses.append('Unspecified'.encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def prepare_example(image_path, annotations, label_map_dict):
    """
    Converts a dictionary with annotations for an image to tf.Example proto.
    :param image_path: full path to the image
    :param annotations: a list object obtained by reading the annotation csv file
    :param label_map_dict: a map from string label names to integer ids.
    :return: example: The converted tf.Example.
    """
    print("encoding %s" % image_path)
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)

    if image.format != 'PNG':
        raise ValueError('Image format error')

    key = hashlib.sha256(encoded_png).hexdigest()
    # obtain attributes
    width, height = image.size
    img_filename = image_path.split('/')[-1]

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    occlud = []

    xmin.append(int(annotations[2]) / width)
    ymin.append(int(annotations[3]) / height)
    xmax.append(int(annotations[4]) / width)
    ymax.append(int(annotations[5]) / height)
    class_name = annotations[1]
    classes_text.append(class_name)
    classes.append(label_map_dict[class_name])
    classes_text = [class_text.encode('utf-8') for class_text in classes_text]
    trun, occ = annotations[6].split(',')
    truncated.append(int(trun))
    occlud.append(int(occ))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(img_filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(img_filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.int64_list_feature(occlud),
        }))
    return example
def dict_to_tf_example(filename,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).    
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, filename + '.jpg')
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    mask_cv = cv2.imread(mask_path)
    height, width, channels = mask_cv.shape
    '''
  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)

  # print("mask pixel", mask[400][200])
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  print("img_path:", img_path)
  print("mask_path:", mask_path)  
  mask_np = np.asarray(mask)

  width = int(data['size']['width'])
  height = int(data['size']['height'])
  '''

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    # masks = []

    print('-filename', filename)
    bndboxes, masks, buttons_list = analize_mask(mask_cv)
    print(
        '--bndboxes length is {}, masks length is {} and buttons_list length is {}'
        .format(len(bndboxes), len(masks), len(buttons_list)))

    for index, bndbox in enumerate(bndboxes):
        xmin, xmax, ymin, ymax = bndbox

        difficult = bool(0)
        if ignore_difficult_instances and difficult:
            continue
        difficult_obj.append(int(difficult))

        xmins.append(xmin / width)
        ymins.append(ymin / height)
        xmaxs.append(xmax / width)
        ymaxs.append(ymax / height)

        # classes_text.append(obj['name'].encode('utf8'))
        # classes.append(label_map_dict[obj['name']])
        # print("label_map_dict[obj['name']]", label_map_dict[obj['name']])

        class_name = buttons_list[index]
        print('---box ' + str(index + 1) + ' is class_name ' + class_name +
              ' with label_map_dict[class_name] # ' +
              str(label_map_dict[class_name]) + ' xmin: ' + str(xmin) +
              ', ymin: ' + str(ymin) + ', xmax: ' + str(xmax) + ', ymax: ' +
              str(ymax))
        count_element(class_name)
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

        truncated.append(0)
        poses.append('Unspecified'.encode('utf8'))

        #  mask_remapped = (mask_np != 2).astype(np.uint8)
        #  masks.append(mask_remapped)

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }

    if mask_type == 'png':
        encoded_mask_png_list = []
        for mask in masks:
            img = PIL.Image.fromarray(mask)
            output = io.BytesIO()
            img.save(output, format='PNG')
            encoded_mask_png_list.append(output.getvalue())
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def _create_tfexample(label_map_dict, image_id, encoded_image,
                      encoded_next_image, depth, next_depth, flow,
                      segmentation, extrinsics_dict, next_extrinsics_dict,
                      tracking_rows, next_tracking_rows,
                      segmentation_color_map, first_extrinsics_dict):
    frame_id = int(image_id.split('_')[1])
    assert frame_id == extrinsics_dict[
        'frame'] == next_extrinsics_dict['frame'] - 1
    next_tracking_row_map = {row['tid']: row for row in next_tracking_rows}
    height, width = depth.shape[:2]

    extrinsics = np.reshape(
        np.array(list(extrinsics_dict.values())[1:], dtype=np.float32), [4, 4])
    next_extrinsics = np.reshape(
        np.array(list(next_extrinsics_dict.values())[1:], dtype=np.float32),
        [4, 4])
    first_extrinsics = np.reshape(
        np.array(list(first_extrinsics_dict.values())[1:], dtype=np.float32),
        [4, 4])
    camera_moving = not np.allclose(extrinsics, next_extrinsics)
    q_cam1 = q_from_rotation_matrix(extrinsics[:3, :3])
    q_cam2 = q_from_rotation_matrix(next_extrinsics[:3, :3])
    trans_cam1 = extrinsics[:3, 3]
    trans_cam2 = next_extrinsics[:3, 3]
    q_cam1_to_cam2 = q_difference(q_cam1, q_cam2)
    q_cam2_to_cam1 = q_conjugate(q_cam1_to_cam2)
    trans_cam1_to_cam2 = trans_cam2 - q_rotate(q_cam1_to_cam2, trans_cam1)
    trans_cam2_to_cam1 = trans_cam1 - q_rotate(q_cam2_to_cam1, trans_cam2)
    print(q_cam1_to_cam2)
    camera_motion = np.concatenate([
        q_cam1_to_cam2, trans_cam1_to_cam2,
        np.array([camera_moving], dtype=np.float32)
    ])

    boxes = []
    masks = []
    classes = []
    motions = []
    diff = 0
    for row in tracking_rows:
        next_row = next_tracking_row_map.get(row['tid'])
        label = row['orig_label']
        tid = row['tid']
        # ensure object still tracked in next frame and visible in original frame
        if next_row is not None and row['occupr'] > 0.1:
            assert frame_id == row['frame'] == next_row['frame'] - 1
            box = np.array([row['t'], row['l'], row['b'], row['r']],
                           dtype=np.float64)
            boxes.append(box)
            class_id = label_map_dict[label.lower()]
            classes.append(class_id)
            # find out which color this object corresponds to in the segmentation image
            seg_r, seg_g, seg_b = segmentation_color_map['{}:{}'.format(
                label, tid)]
            mask = ((segmentation[:, :, 0] == seg_r).astype(np.uint8) +
                    (segmentation[:, :, 1] == seg_g).astype(np.uint8) +
                    (segmentation[:, :, 2] == seg_b).astype(np.uint8))
            mask = (mask == 3).astype(np.uint8)
            masks.append(mask)
            moving = int(row['moving'])
            p1 = _get_pivot(row)
            p2 = _get_pivot(next_row)
            q1 = _get_q(row)
            q2 = _get_q(next_row)
            q = q_multiply(q2, q_multiply(q_cam1_to_cam2, q_conjugate(q1)))
            p2_cam1 = q_rotate(q_cam2_to_cam1, p2) + trans_cam2_to_cam1
            trans = p2_cam1 - q_rotate(q, p1)
            if moving == 0:
                q = np.array([1, 0, 0, 0], dtype=np.float32)
                trans = np.array([0, 0, 0], dtype=np.float32)
            mv = np.array([moving], dtype=np.float32)
            motion = np.concatenate([q, trans, p1, mv])
            diff += np.sum(
                np.abs(
                    q_rotate(q_cam1_to_cam2,
                             q_rotate(q, p1) + trans) + trans_cam1_to_cam2 -
                    p2))
            motions.append(motion)
    print(diff)
    if len(boxes) > 0:
        boxes = np.stack(boxes, axis=0)
        masks = np.stack(masks, axis=0)
        motions = np.stack(motions, axis=0)
    else:
        boxes = np.zeros((0, 5), dtype=np.float32)
        masks = np.zeros((0, height, width), dtype=np.float32)
        motions = np.zeros((0, 15), dtype=np.float32)

    num_instances = boxes.shape[0]

    ymins = (boxes[:, 0] / height).tolist()
    xmins = (boxes[:, 1] / width).tolist()
    ymaxs = (boxes[:, 2] / height).tolist()
    xmaxs = (boxes[:, 3] / width).tolist()
    index_0, index_1, index_2 = np.nonzero(masks)
    key = hashlib.sha256(encoded_image).hexdigest()

    camera_intrinsics = np.array([725.0, 620.5, 187.0], dtype=np.float32)

    if FLAGS.gt_rigid_flow_from_motion:
        example_flow = dense_flow_from_motion(np.expand_dims(depth, 2),
                                              motions, masks, camera_motion,
                                              camera_intrinsics)
    else:
        example_flow = flow

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_id.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_id.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'next_image/encoded':
            dataset_util.bytes_feature(encoded_next_image),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/motion':
            dataset_util.float_list_feature(motions.ravel().tolist()),
            'image/segmentation/object/count':
            dataset_util.int64_feature(num_instances),
            'image/segmentation/object/index_0':
            dataset_util.int64_list_feature(index_0.tolist()),
            'image/segmentation/object/index_1':
            dataset_util.int64_list_feature(index_1.tolist()),
            'image/segmentation/object/index_2':
            dataset_util.int64_list_feature(index_2.tolist()),
            'image/segmentation/object/class':
            dataset_util.int64_list_feature(classes),
            'image/depth':
            dataset_util.float_list_feature(depth.ravel().tolist()),
            'next_image/depth':
            dataset_util.float_list_feature(next_depth.ravel().tolist()),
            'image/flow':
            dataset_util.float_list_feature(example_flow.ravel().tolist()),
            'image/camera/motion':
            dataset_util.float_list_feature(camera_motion.tolist()),
            'image/camera/intrinsics':
            dataset_util.float_list_feature(camera_intrinsics.tolist())
        }))
    return example, num_instances
Exemple #16
0
def dict_to_tf_example(labels_corners, labels_center, labels_data, params,
                       label_map_dict, image_dir, image_dir_beliefs,
                       image_prefix, image_prev_prefix):
    width = round(params['pointcloud_grid_map_interface']['grids']['cartesian']
                  ['range']['y'] / params['pointcloud_grid_map_interface']
                  ['grids']['cartesian']['resolution']['y'])
    height = round(
        params['pointcloud_grid_map_interface']['grids']['cartesian']['range']
        ['x'] / params['pointcloud_grid_map_interface']['grids']['cartesian']
        ['resolution']['x'])
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    x_c = []
    y_c = []
    w = []
    h = []
    angle = []
    sin_angle = []
    cos_angle = []
    classes = []
    classes_text = []

    for idx, label_corner in enumerate(labels_corners):
        xmin.append(min(label_corner[0]) / width)
        ymin.append(min(label_corner[1]) / height)
        xmax.append(max(label_corner[0]) / width)
        ymax.append(max(label_corner[1]) / height)
        x_min = min(label_corner[0]) / width
        y_min = min(label_corner[1]) / height
        x_max = max(label_corner[0]) / width
        y_max = max(label_corner[1]) / height
        if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1):
            print(x_min, y_min, x_max, y_max)
            raise ValueError('Box Parameters greather than 1.0')
        if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0):
            raise ValueError('Box Parameters less than 0.0')
        x_c.append(labels_center[idx][0])
        y_c.append(labels_center[idx][1])
        angle_rad = _flipAngle(labels_data[idx].rz)
        angle.append(angle_rad)
        sin_angle.append(math.sin(2 * angle_rad))
        cos_angle.append(math.cos(2 * angle_rad))
        vec_s_x = math.cos(angle_rad)
        vec_s_y = math.sin(angle_rad)

        w_p = labels_data[idx].w / params['pointcloud_grid_map_interface'][
            'grids']['cartesian']['resolution']['y']
        w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x /
                                (width * width) + vec_s_y * vec_s_y /
                                (height * height))
        w.append(w_p_s)

        l_p = labels_data[idx].l / params['pointcloud_grid_map_interface'][
            'grids']['cartesian']['resolution']['x']
        l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x /
                                (height * height) + vec_s_y * vec_s_y /
                                (width * width))
        h.append(l_p_s)

        class_name = labels_data[idx].type
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    return tf.train.Example(features=tf.train.Features(
        feature={
            'id':
            dataset_util.bytes_feature(image_prefix.encode('utf8')),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'layers/height':
            dataset_util.int64_feature(height),
            'layers/width':
            dataset_util.int64_feature(width),
            'layers/detections/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'detections_cartesian')),
            'layers/observations/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'observations_cartesian')),
            'layers/decay_rate/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'decay_rate_cartesian')),
            'layers/intensity/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix, 'intensity_cartesian')),
            'layers/zmin/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_min_detections_cartesian')),
            'layers/zmax/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_max_detections_cartesian')),
            'layers/occlusions/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir, image_prefix,
                           'z_max_occlusions_cartesian')),
            'layers/bel_O_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'bel_O_FUSED_cartesian')),
            'layers/bel_F_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'bel_F_FUSED_cartesian')),
            'layers/bel_U_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'bel_U_FUSED_cartesian')),
            'layers/detections_drivingCorridor_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'detections_drivingCorridor_FUSED_cartesian')),
            'layers/z_max_detections_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'z_max_detections_FUSED_cartesian')),
            'layers/z_min_detections_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'z_min_detections_FUSED_cartesian')),
            'layers/observations_z_min_FUSED/encoded':
            dataset_util.bytes_feature(
                _readImage(image_dir_beliefs, image_prefix,
                           'observations_z_min_FUSED_cartesian')),

            # 'layers_prev/detections/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'detections_cartesian')),
            # 'layers_prev/observations/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'observations_cartesian')),
            # 'layers_prev/decay_rate/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'decay_rate_cartesian')),
            # 'layers_prev/intensity/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'intensity_cartesian')),
            # 'layers_prev/zmin/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'z_min_detections_cartesian')),
            # 'layers_prev/zmax/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'z_max_detections_cartesian')),
            # 'layers_prev/occlusions/encoded': dataset_util.bytes_feature(
            #     _readImage(image_dir, image_prev_prefix, 'z_max_occlusions_cartesian')),
            'boxes/aligned/x_min':
            dataset_util.float_list_feature(xmin),
            'boxes/aligned/x_max':
            dataset_util.float_list_feature(xmax),
            'boxes/aligned/y_min':
            dataset_util.float_list_feature(ymin),
            'boxes/aligned/y_max':
            dataset_util.float_list_feature(ymax),
            'boxes/inclined/x_c':
            dataset_util.float_list_feature(x_c),
            'boxes/inclined/y_c':
            dataset_util.float_list_feature(y_c),
            'boxes/inclined/w':
            dataset_util.float_list_feature(w),
            'boxes/inclined/h':
            dataset_util.float_list_feature(h),
            'boxes/inclined/angle':
            dataset_util.float_list_feature(angle),
            'boxes/inclined/sin_angle':
            dataset_util.float_list_feature(sin_angle),
            'boxes/inclined/cos_angle':
            dataset_util.float_list_feature(cos_angle),
            'boxes/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'boxes/class/label':
            dataset_util.int64_list_feature(classes),
        }))
def create_tf_example(group, path):
    with tf.gfile.GFile(
            os.path.join(path, '{}'.format(group.filename.split("/")[-1])),
            'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['Upper left corner X'] / width)
        xmaxs.append(row['Lower right corner X'] / width)
        ymins.append(row['Upper left corner Y'] / height)
        ymaxs.append(row['Lower right corner Y'] / height)
        lightColor = ""
        if "go" in row['Annotation tag']:
            lightColor = "Green"
        if "stop" in row['Annotation tag']:
            lightColor = "Red"
        if "warning" in row['Annotation tag']:
            lightColor = "Yellow"
        classes_text.append(lightColor.encode())
        classes.append(int(LABEL_DICT[lightColor]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
                    embed()
                xmins.append(xmin / width)
                xmaxs.append(xmax / width)
                ymins.append(ymin / height)
                ymaxs.append(ymax / height)
                classes_text.append(tf.compat.as_bytes(text))
                classes.append(index)
                # cv2.rectangle(_img, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
                # cv2.putText(_img, text, (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), lineType=cv2.LINE_AA)
        # _path = os.path.join(out_path, os.path.splitext(data[key]["filename"])[0]+"_ann"+os.path.splitext(data[key]["filename"])[1])
        # cv2.imwrite(_path, _img)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
Exemple #19
0
    def create_tf_record(self,
                         has_additional_channels=False,
                         num_shards=1,
                         num_examples_per_shard=1):
        def dummy_jpeg_fn():
            image_tensor = np.random.randint(255,
                                             size=(4, 5, 3)).astype(np.uint8)
            additional_channels_tensor = np.random.randint(
                255, size=(4, 5, 1)).astype(np.uint8)
            encoded_jpeg = tf.image.encode_jpeg(image_tensor)
            encoded_additional_channels_jpeg = tf.image.encode_jpeg(
                additional_channels_tensor)

            return encoded_jpeg, encoded_additional_channels_jpeg

        encoded_jpeg, encoded_additional_channels_jpeg = self.execute(
            dummy_jpeg_fn, [])

        tmp_dir = self.get_temp_dir()
        flat_mask = (4 * 5) * [1.0]

        for i in range(num_shards):
            path = os.path.join(tmp_dir, '%05d.tfrecord' % i)
            writer = tf.python_io.TFRecordWriter(path)

            for j in range(num_examples_per_shard):
                if num_shards > 1:
                    source_id = (str(i) + '_' + str(j)).encode()
                else:
                    source_id = str(j).encode()

                features = {
                    'image/source_id':
                    dataset_util.bytes_feature(source_id),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                    'image/format':
                    dataset_util.bytes_feature('jpeg'.encode('utf8')),
                    'image/height':
                    dataset_util.int64_feature(4),
                    'image/width':
                    dataset_util.int64_feature(5),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/class/label':
                    dataset_util.int64_list_feature([2]),
                    'image/object/mask':
                    dataset_util.float_list_feature(flat_mask),
                }

                if has_additional_channels:
                    additional_channels_key = 'image/additional_channels/encoded'
                    features[
                        additional_channels_key] = dataset_util.bytes_list_feature(
                            [encoded_additional_channels_jpeg] * 2)

                example = tf.train.Example(features=tf.train.Features(
                    feature=features))
                writer.write(example.SerializeToString())

            writer.close()

        return os.path.join(self.get_temp_dir(), '?????.tfrecord')
Exemple #20
0
def create_tf_example(example, mode):
    # Some images referenced in the xml aren't in the dataset
    try:
        activ_image = Image.open(join(example['path_to_image'],
                                      example['file_name']),
                                 mode='r')
    except:
        print("Could not find {0}; skipping".format(
            join(example['path_to_image'], example['file_name'])))
        return None

    if USE_GRAYSCALE:
        activ_image = activ_image.convert('L').convert('RGB')

    # Normalized x,y coordinates
    width, height = activ_image.size
    xmins = [x / float(width) for x in example['bbox_xmins']]
    xmaxs = [x / float(width) for x in example['bbox_xmaxs']]
    ymins = [y / float(height) for y in example['bbox_ymins']]
    ymaxs = [y / float(height) for y in example['bbox_ymaxs']]

    # Skip the image if it doesn't match INPUT_WIDTH x INPUT_HEIGHT
    if mode != "test" and ONE_IMAGE_SIZE and (height != INPUT_HEIGHT
                                              or width != INPUT_WIDTH):
        #print("Input image does not match expected size {0}x{1}; skipping".format(INPUT_WIDTH,INPUT_HEIGHT))
        return None

    # If needed, resize now that the normalized box coordinates have been calculated
    if width > 1000 or height > 1000:
        basewidth = 1000
        wpercent = (basewidth / float(width))
        hsize = int((float(height) * float(wpercent)))
        activ_image = activ_image.resize((basewidth, hsize), Image.ANTIALIAS)
        width, height = activ_image.size

    imgByteArr = io.BytesIO()
    if example['extension'] in ['jpg', 'jpeg']:
        activ_image.save(imgByteArr, format='JPEG')
    else:
        activ_image.save(imgByteArr, format='PNG')
    encoded_image_data = imgByteArr.getvalue()  # Encoded image bytes

    filename = example['file_name'].encode(
        'utf-8')  # Filename of the image. Empty if image is not from file
    image_format = example['image_format']  # b'jpeg' or b'png'

    # List of string class name of bounding box (1 per box)
    classes_text = [example['label'] for i in range(len(xmins))]

    # List of integer class id of bounding box (1 per box)
    classes = [example['label_num'] for i in range(len(xmins))]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
def create_tf_record(data,
                     label_map_dict,
                     is_yaml=False,
                     ignore_difficult_instances=False):
    """
    Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
    :param data: dict holding (XML or YAML) fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict)
    :param label_map_dict: A map from string label names to integers ids.
    :param ignore_difficult_instances: Whether to skip difficult instances in the dataset  (default: False).

    Returns:
    :return tf_example: The converted tf.Example.

    Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """

    with tf.gfile.GFile(data['path'], 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    if is_yaml:
        width = int(data['width'])
        height = int(data['height'])
        filename = data['path'].encode('utf8')
        for box in data['boxes']:
            difficult_obj.append(0)

            xmin.append(float(box['x_min']) / width)
            ymin.append(float(box['y_min']) / height)
            xmax.append(float(box['x_max']) / width)
            ymax.append(float(box['y_max']) / height)
            classes_text.append(box['label'].encode('utf8'))
            classes.append(label_map_dict[box['label']])
            truncated.append(0)
            poses.append(r'Unspecified'.encode('utf8'))
    else:
        width = int(data['size']['width'])
        height = int(data['size']['height'])
        filename = data['filename'].encode('utf8')

        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(r'jpg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return tf_example
Exemple #22
0
def main(_):
    xml_file = FLAGS.xml

    logging.info('Reading from DLIB %s dataset.', xml_file)

    # find all labels in xml
    xml_data = xml.etree.ElementTree.parse( xml_file ).getroot()
    labels = find_all_labels_recursive(xml_data)
    labels = sorted(labels)
    print(labels)

    # create pbtxt
    label_map_filename = os.path.splitext(FLAGS.output_path)[0] + '.pbtxt'
    with open(label_map_filename,'w') as f:
        for idx,lbl in enumerate(labels):
            f.write("item {{\n  id: {}\n  name: '{}'\n}}\n\n".format(idx+1,lbl))

    # create TF record file
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    xml_file_dir = os.path.dirname(xml_file)
    print('XML file directory is {}.'.format(xml_file_dir))
    for img_node in xml_data.find('images'):

        # get image path
        img_filename = img_node.attrib['file']
        if os.path.isabs( img_filename ):
            img_path = img_filename
        else:
            img_path = os.path.join(xml_file_dir,img_filename)

        # read image
        with tf.gfile.GFile(img_path, 'rb') as fid:
            encoded_jpg = fid.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)
        image = PIL.Image.open(encoded_jpg_io)
        if image.format != 'JPEG':
            raise ValueError('Image format not JPEG')
        key = hashlib.sha256(encoded_jpg).hexdigest()
        width,height = image.size

        print("Image {}: {}x{}".format(os.path.basename(img_filename),width,height))

        xmin = []
        ymin = []
        xmax = []
        ymax = []
        classes = []
        classes_text = []
        truncated = []
        difficult_obj = []

        # read annotations
        for annot_node in img_node:
            label_node = annot_node.find('label')
            if label_node is not None:
                label = label_node.text
            else:
                print("Skipping annotation because label is empty")

            bbox = dict(ymin=int(annot_node.attrib['top']),
                        xmin=int(annot_node.attrib['left']),
                        xmax=int(annot_node.attrib['left'])+int(annot_node.attrib['width']),
                        ymax=int(annot_node.attrib['top'])+int(annot_node.attrib['height']))

            is_truncated = bbox['xmin'] < 0 or bbox['xmax'] >= width or bbox['ymin'] < 0 or bbox['ymax'] >= height
            is_difficult = False

            xmin.append(float(bbox['xmin']) / width)
            ymin.append(float(bbox['ymin']) / height)
            xmax.append(float(bbox['xmax']) / width)
            ymax.append(float(bbox['ymax']) / height)
            classes_text.append(label.encode('utf8'))
            classes.append(labels.index(label)+1)
            truncated.append(int(is_truncated))
            difficult_obj.append(int(is_difficult))

        if len(classes) > 0:
            example = tf.train.Example(features=tf.train.Features(feature={
                'image/height': dataset_util.int64_feature(height),
                'image/width': dataset_util.int64_feature(width),
                'image/filename': dataset_util.bytes_feature(img_filename.encode('utf8')),
                'image/source_id': dataset_util.bytes_feature(img_filename.encode('utf8')),
                'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
                'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
                'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
                'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
                'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label': dataset_util.int64_list_feature(classes),
                'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                'image/object/truncated': dataset_util.int64_list_feature(truncated),
                #'image/object/view': dataset_util.bytes_list_feature(poses),
            }))
            writer.write(example.SerializeToString())

    # print(data)
    #examples_list = dataset_util.read_examples_list(examples_path)
    # for idx, example in enumerate(examples_list):
    #   if idx % 100 == 0:
    #     logging.info('On image %d of %d', idx, len(examples_list))
    #   path = os.path.join(annotations_dir, example + '.xml')
    #   with tf.gfile.GFile(path, 'r') as fid:
    #     xml_str = fid.read()
    #   xml = etree.fromstring(xml_str)
    #   data = dataset_util.recursive_parse_xml_to_dict(xml)
    #   tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
    #                                   FLAGS.ignore_difficult_instances)
    #   writer.write(tf_example.SerializeToString())

    writer.close()
def dict_to_tf_example(data, label_map_dict):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """

  encoded_jpg_io = io.BytesIO()
  image = data['image']
  image.save(encoded_jpg_io, "JPEG", quality=80)
  encoded_jpg = encoded_jpg_io.getvalue()
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width, height = image.size

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  rotation = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  masks = []
  difficult_obj = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    difficult_obj.append(int(difficult))

    xmin.append(float(obj['bndbox']['xmin']) / width)
    ymin.append(float(obj['bndbox']['ymin']) / height)
    xmax.append(float(obj['bndbox']['xmax']) / width)
    ymax.append(float(obj['bndbox']['ymax']) / height)
    rotation.append(float(obj['rotation']))
    masks.append(obj['mask'])
    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  mask = np.stack(masks)
  encoded_mask = pn_encode(mask.flatten()).tolist()
  mask_length = len(encoded_mask)
  print('mask encode:', mask.shape, '->', len(encoded_mask)) ###
  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/rotation': dataset_util.float_list_feature(rotation),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
      'image/segmentation/object': dataset_util.int64_list_feature(encoded_mask),
      'image/segmentation/length': dataset_util.int64_feature(mask_length),
      'image/segmentation/object/class': dataset_util.int64_list_feature(classes),
  }))
  return example
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=True):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Exemple #25
0
def dict_to_tf_example(data,
                       dataset_directory,
                       set_name,
                       label_map_dict,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      set_name: name of the set training, validation or test
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(set_name, data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' not in data:
        data['object'] = []
    for obj in data['object']:
        if obj['name'] in label_map_dict:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
                data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
                data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding COWFACE XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG

    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances)

    data:
    {'source':
         {'database': 'Unknown'},
          'object': [
                {
               'bndbox': {'xmin': '115', 'ymin': '5', 'ymax': '124', 'xmax': '261'}, 'difficult': '0', 'pose': 'Unspecified', 'name': 'dog', 'truncated': '0'
                }
          ],
         'filename': '1.jpg', 'segmented': '0', 'path': '/home/liuhy/Downloads/dog/1.jpg', 'folder': 'dog',
         'size': {'width': '323', 'depth': '3', 'height': '156'}
    }
    """
    img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])   # dog/JPEGImages/1.jpg
    full_path = os.path.join(dataset_directory, img_path)   # FLAGS.data_dir/dog/JPEGImages/1.jpg
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
Exemple #27
0
def create_tf_example(example: list):
    """
    Creates a tf.train.Example object from an image and its labels which can be used in
    the training pipeline for the object detector.

    Args:
        example: list containing information about the image and its labels.

    Returns: information of example parsed into a tf.train.Example object
    """
    width = int(example[0])
    height = int(example[1])
    filename = str.encode(example[2])

    with tf.gfile.GFile(example[3], 'rb') as f:
        encoded_image_data = bytes(f.read())
    image_format = b'jpg'

    boxes = example[5]
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []

    for b in boxes:
        xmins.append(b[0])
        ymins.append(b[1])
        xmaxs.append(b[2])
        ymaxs.append(b[3])

    xmins = [x / width for x in xmins]
    xmaxs = [x / width for x in xmaxs]
    ymins = [y / height for y in ymins]
    ymaxs = [y / height for y in ymaxs]

    class_reader = ClassReader(known_class_ids_annotation_predictor)

    classes_text = example[4][:]
    classes = []

    none_vals = []
    for i, cls in enumerate(classes_text):
        if cls is None:
            none_vals.append(i)

    for index in sorted(none_vals, reverse=True):
        classes_text.pop(index)
        xmins.pop(index)
        ymins.pop(index)
        xmaxs.pop(index)
        ymaxs.pop(index)

    for i, cls in enumerate(classes_text):
        classes.append(class_reader.get_index_of_class_from_label(cls))
        class_encoded = str.encode(cls)
        classes_text[i] = class_encoded

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(image_subdirectory, data['filename'])
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  with tf.gfile.GFile(mask_path, 'rb') as fid:
    encoded_mask_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_mask_png)
  mask = PIL.Image.open(encoded_png_io)
  if mask.format != 'PNG':
    raise ValueError('Mask format not PNG')

  mask_np = np.asarray(mask)
  nonbackground_indices_x = np.any(mask_np != 2, axis=0)
  nonbackground_indices_y = np.any(mask_np != 2, axis=1)
  nonzero_x_indices = np.where(nonbackground_indices_x)
  nonzero_y_indices = np.where(nonbackground_indices_y)

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmins = []
  ymins = []
  xmaxs = []
  ymaxs = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  masks = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue
    difficult_obj.append(int(difficult))

    if faces_only:
      xmin = float(obj['bndbox']['xmin'])
      xmax = float(obj['bndbox']['xmax'])
      ymin = float(obj['bndbox']['ymin'])
      ymax = float(obj['bndbox']['ymax'])
    else:
      xmin = float(np.min(nonzero_x_indices))
      xmax = float(np.max(nonzero_x_indices))
      ymin = float(np.min(nonzero_y_indices))
      ymax = float(np.max(nonzero_y_indices))

    xmins.append(xmin / width)
    ymins.append(ymin / height)
    xmaxs.append(xmax / width)
    ymaxs.append(ymax / height)
    class_name = get_class_name_from_filename(data['filename'])
    classes_text.append(class_name.encode('utf8'))
    classes.append(label_map_dict[class_name])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))
    if not faces_only:
      mask_remapped = (mask_np != 2).astype(np.uint8)
      masks.append(mask_remapped)

  feature_dict = {
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }
  if not faces_only:
    if mask_type == 'numerical':
      mask_stack = np.stack(masks).astype(np.float32)
      masks_flattened = np.reshape(mask_stack, [-1])
      feature_dict['image/object/mask'] = (
          dataset_util.float_list_feature(masks_flattened.tolist()))
    elif mask_type == 'png':
      encoded_mask_png_list = []
      for mask in masks:
        img = PIL.Image.fromarray(mask)
        output = io.BytesIO()
        img.save(output, format='PNG')
        encoded_mask_png_list.append(output.getvalue())
      feature_dict['image/object/mask'] = (
          dataset_util.bytes_list_feature(encoded_mask_png_list))

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return example
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []

    for obj in data['object']:

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        #class_name = get_class_name_from_filename(data['filename'])
        class_name = 'gazelle_head'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return example
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
  """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
  full_path = os.path.join(dataset_directory, img_path)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  key = hashlib.sha256(encoded_jpg).hexdigest()

  width = int(data['size']['width'])
  height = int(data['size']['height'])

  xmin = []
  ymin = []
  xmax = []
  ymax = []
  classes = []
  classes_text = []
  truncated = []
  poses = []
  difficult_obj = []
  for obj in data['object']:
    difficult = bool(int(obj['difficult']))
    if ignore_difficult_instances and difficult:
      continue

    difficult_obj.append(int(difficult))

    xmin.append(float(obj['bndbox']['xmin']) / width)
    ymin.append(float(obj['bndbox']['ymin']) / height)
    xmax.append(float(obj['bndbox']['xmax']) / width)
    ymax.append(float(obj['bndbox']['ymax']) / height)
    classes_text.append(obj['name'].encode('utf8'))
    classes.append(label_map_dict[obj['name']])
    truncated.append(int(obj['truncated']))
    poses.append(obj['pose'].encode('utf8'))

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(
          data['filename'].encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.int64_list_feature(truncated),
      'image/object/view': dataset_util.bytes_list_feature(poses),
  }))
  return example
Exemple #31
0
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       example,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """

    # for embedded folders like images/bw1/image1.jpg
    # path = data['path'].split(os.sep)[-2]
    # print(path, data['filename'])
    # if len(path) < 5 and 'bw' not in path:
    #   path = 'bw' + path
    # img_path = os.path.join(image_subdirectory, path, data['filename'])
    # print(img_path)
    print(example)
    img_name = example.split(".")[0]
    img_path = os.path.join(image_subdirectory, img_name + '.jpg')

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        # class_name = get_class_name_from_filename(data['filename'])
        class_name = obj['name'].split(' ')[0]
        # class_name = 'pack'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  image_height = image['height']
  image_width = image['width']
  filename = image['file_name']
  image_id = image['id']

  full_path = os.path.join(image_dir, filename)
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()
  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  key = hashlib.sha256(encoded_jpg).hexdigest()

  xmin = []
  xmax = []
  ymin = []
  ymax = []
  is_crowd = []
  category_names = []
  category_ids = []
  area = []
  encoded_mask_png = []
  num_annotations_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
      num_annotations_skipped += 1
      continue
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
    category_names.append(category_index[category_id]['name'].encode('utf8'))
    area.append(object_annotations['area'])

    if include_masks:
      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
                                          image_height, image_width)
      binary_mask = mask.decode(run_len_encoding)
      if not object_annotations['iscrowd']:
        binary_mask = np.amax(binary_mask, axis=2)
      pil_image = PIL.Image.fromarray(binary_mask)
      output_io = io.BytesIO()
      pil_image.save(output_io, format='PNG')
      encoded_mask_png.append(output_io.getvalue())
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
      'image/width':
          dataset_util.int64_feature(image_width),
      'image/filename':
          dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id':
          dataset_util.bytes_feature(str(image_id).encode('utf8')),
      'image/key/sha256':
          dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded':
          dataset_util.bytes_feature(encoded_jpg),
      'image/format':
          dataset_util.bytes_feature('jpeg'.encode('utf8')),
      'image/object/bbox/xmin':
          dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax':
          dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin':
          dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax':
          dataset_util.float_list_feature(ymax),
      'image/object/class/label':
          dataset_util.int64_list_feature(category_ids),
      'image/object/is_crowd':
          dataset_util.int64_list_feature(is_crowd),
      'image/object/area':
          dataset_util.float_list_feature(area),
  }
  if include_masks:
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
  return key, example, num_annotations_skipped
Exemple #33
0
def create_tf_example(group, path, is_pevid: bool = False):
    """Creates a TF example given a dataframe containing annotation information and input image path.
     This is only used for TFExample creation from the PEViD-UHD dataset"""

    if is_pevid:
        # Get and add the volume and 'frames' to the path
        volume_name = group.filename.split('_frame')[0]
        path = os.path.join(path, volume_name, 'frames')

    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_examples(writer, anns, path, file_name, width, height,
                       encoded_jpg):
    xmins, ymins = [], []
    xmaxs, ymaxs = [], []
    classes_text = []
    classes = []
    num_examples = 0
    for ann in anns:
        xmin = ann['bbox'][0]
        ymin = ann['bbox'][1]
        w = ann['bbox'][2]
        h = ann['bbox'][3]
        xmax = xmin + w
        ymax = ymin + h

        # normalize
        xmin /= width
        xmax /= width
        ymin /= height
        ymax /= height

        if xmin < 1 and xmax < 1 and ymin < 1 and ymax < 1:
            xmins.append(xmin)
            xmaxs.append(xmax)
            ymins.append(ymin)
            ymaxs.append(ymax)
            classes_text.append('Text'.encode('utf8'))
            classes.append(1)

    filename = os.path.join(path, file_name)
    filename = filename.encode('utf8')
    image_format = b'jpg'

    if len(xmins) != 0:
        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        writer.write(tf_example.SerializeToString())
        num_examples += 1
    return num_examples
Exemple #35
0
def create_tf_example(_filename, _encoded_image_data, _width, _height,
                      _metadata):
    height = _height  # Image height
    width = _width  # Image width
    #  if height != 300 or width != 300:
    #    print "Invalid Image dimensions! " + str(width) + "x" + str(height)
    #    exit()

    filename = _filename  # Filename of the image. Empty if image is not from file
    encoded_image_data = _encoded_image_data  # Encoded image bytes
    image_format = b'jpeg'  # or b'png'
    metadata = _metadata

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)

    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    number_entries = int(next(metadata))
    for i in range(0, number_entries):
        next_line = next(metadata).strip('\n')

        data = [float(entry) for entry in next_line.split(' ')[:-2]]
        centre_x = data[3]
        centre_y = data[4]

        r_major = data[0] * math.cos(data[2] * math.pi / 180)
        r_minor = data[1] * math.cos(data[2] * math.pi / 180)

        x = float(centre_x - r_minor)
        y = float(centre_y - r_major)
        w = float(r_minor * 2)
        h = float(r_major * 2)

        if math.isnan(x) or math.isnan(y) or math.isnan(w) or math.isnan(
                h) or height is 0 or width is 0:
            print "NAN!"
            exit()

        xmins.append(x / float(width))
        xmaxs.append((x + w) / float(width))

        ymins.append(y / float(height))
        ymaxs.append((y + h) / float(height))

        classes_text.append('face')
        classes.append(1)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).    
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    '''
  print("mask.size", mask.size)
  px = mask.load()
  print ("pixel", px[400, 200])
  print ("pixel", px[0, 0])
  '''
    # print("mask pixel", mask[400][200])
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    print("img_path:", img_path)
    print("mask_path:", mask_path)
    mask_np = np.asarray(mask)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))

            xmin = float(obj['bndbox']['xmin'])
            xmax = float(obj['bndbox']['xmax'])
            ymin = float(obj['bndbox']['ymin'])
            ymax = float(obj['bndbox']['ymax'])

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)

            # classes_text.append(obj['name'].encode('utf8'))
            # classes.append(label_map_dict[obj['name']])
            # print("label_map_dict[obj['name']]", label_map_dict[obj['name']])

            class_name = get_class_name_from_filename(data['filename'])
            print('class_name', class_name, 'label_map_dict[class_name]',
                  label_map_dict[class_name])
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])

            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

            mask_remapped = (mask_np != 2).astype(np.uint8)
            masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }

    if mask_type == 'numerical':
        mask_stack = np.stack(masks).astype(np.float32)
        masks_flattened = np.reshape(mask_stack, [-1])
        feature_dict['image/object/mask'] = (dataset_util.float_list_feature(
            masks_flattened.tolist()))
    elif mask_type == 'png':
        encoded_mask_png_list = []
        for mask in masks:
            img = PIL.Image.fromarray(mask)
            output = io.BytesIO()
            img.save(output, format='PNG')
            encoded_mask_png_list.append(output.getvalue())
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    print('-------------------')
    return example
def make_shape(max_num, width, height, bg_color=True, filename=None, label_map_dict=None):
    if bg_color:
        r_s = np.random.randint(20, 160)
        r = np.random.randint(r_s, r_s + random.randint(10, 90), width * height, np.uint8).reshape((width, height))
        g_s = np.random.randint(20, 160)
        g = np.random.randint(g_s, g_s + +random.randint(10, 90), width * height, np.uint8).reshape((width, height))
        b_s = np.random.randint(20, 160)
        b = np.random.randint(b_s, b_s + +random.randint(10, 90), width * height, np.uint8).reshape((width, height))
    else:
        r = np.zeros((width, height), np.uint8)
        g = np.zeros((width, height), np.uint8)
        b = np.zeros((width, height), np.uint8)

    bg = cv2.merge([r, g, b])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for i in range(max_num):
        label = random.randint(1, 2)
        x1, y1, x2, y2 = 0, 0, 0, 0
        if label == 1:
            shp = getCircleShape(bg)
            x1, y1, x2, y2 = shp[0] - shp[2], shp[1] - shp[2], shp[0] + shp[2], shp[1] + shp[2]
            cv2.circle(bg, (shp[0], shp[1]), shp[2], getColor(), -1)
            label_text = 'circle'
        elif label == 2:
            shp = getRectShape(bg)
            x1, y1, x2, y2 = shp[0], shp[1], shp[2], shp[3]
            cv2.rectangle(bg, (shp[0], shp[1]), (shp[2], shp[3]), getColor(), -1)
            label_text = 'rect'

        xmins.append(float(x1 / width))
        ymins.append(float(y1 / height))
        xmaxs.append(float(x2 / width))
        ymaxs.append(float(y2 / height))

        classes.append(label_map_dict[label_text])
        classes_text.append(label_text.encode('utf8'))
        truncated.append(0)
        poses.append("0".encode('utf8'))
        difficult_obj.append(0)

    filename=filename + '_temp.jpg'
    img_path = os.path.join('./data', filename + '_temp.jpg')

    cv2.imwrite(img_path, bg)

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses)

    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    os.remove(img_path)
    return example
def dict_to_tf_example(data,label_map_dict,image_dir):
  # TODO(user): Populate the following variables from your example.
  height = int(data['size']['height']) # Image height
  width = int(data['size']['width']) # Image width
  s = data['filename']
  if s[(len(s)-3):len(s)] == 'png':
      s = s[0 :(len(s)-3)]+'jpg' 
  filename = os.path.join(image_dir,s) # Filename of the image. Empty if image is not from file
#  encoded_image_data = None # Encoded image bytes
  with tf.gfile.GFile(filename, 'rb') as fid:
      encoded_image_data = fid.read()
  encoded_image_data_io = io.BytesIO(encoded_image_data)
  image = PIL.Image.open(encoded_image_data_io)
  
  if image.format != 'JPEG':
      raise ValueError('Image format not JPEG')
#  key = hashlib.sha256(encoded_image_data).hexdigest()
  
  image_format = b'jpeg' # b'jpeg' or b'png'

  xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [] # List of normalized right x coordinates in bounding box
             # (1 per box)
  ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = [] # List of string class name of bounding box (1 per box)
  classes = [] # List of integer class id of bounding box (1 per box)
  
  if 'object' in data :
      for obj in data['object']:
        print(obj)
        if obj:
          xmin = float(obj['bndbox']['xmin'])
          xmax = float(obj['bndbox']['xmax'])
          ymin = float(obj['bndbox']['ymin'])
          ymax = float(obj['bndbox']['ymax'])
          xmins.append(xmin / width)
          ymins.append(ymin / height)
          xmaxs.append(xmax / width)
          ymaxs.append(ymax / height)
          class_name = obj['name']
          classes_text.append(class_name.encode('utf8'))
          classes.append(label_map_dict[class_name])



  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  return tf_example
Exemple #39
0
def json_to_tf_example(json_data,
                       dataset_directory,
                       label_map_dict ):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    width  = int(json_data.get("image_width"))
    height = int(json_data.get("image_height"))

    filename=orig_filename = json_data.get("filename")
    full_path=orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename)

    with tf.gfile.GFile(orig_full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    width,height=image.size

    # filename = json_data.get("filename")+".scaled.jpg"
    # #img_path = os.path.join(FLAGS.data_dir,"photos", filename)
    # full_path = os.path.join(FLAGS.data_dir,"photos", filename)
    # if not os.path.exists(full_path):
    #     #raise ValueError('Please scale image :convert abc.jpg -resize 756x1008 sss.jpg')
    #     orig_filename = json_data.get("filename")
    #     orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename)
    #     #image = PIL.Image.open(orig_full_path)
    #     ##image.resize((756,1008), resample=PIL.Image.BILINEAR).save(full_path)
    #     #image.resize((756,1008), resample=PIL.Image.NEAREST).save(full_path)
    #     os.system("convert "+orig_full_path+" -resize 756x1008 "+full_path)


    #full_path = os.path.join(dataset_directory, img_path)
    #with tf.gfile.GFile(full_path, 'rb') as fid:
    #    encoded_jpg = fid.read()
    #encoded_jpg_io = io.BytesIO(encoded_jpg)
    #image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width,height=image.size;

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for obj in json_data.get("bndboxes"):
        difficult_obj.append(0)
        xmin.append(float(obj.get("x")) / width)
        ymin.append(float(obj.get("y")) / height)
        xmax.append(numpy.clip(float(obj.get("x")+obj.get("w")) / width,0,1))
        ymax.append(numpy.clip(float(obj.get("y")+obj.get("h")) / height,0,1))
        classes_text.append(obj.get("id").encode('utf8'))
        classes.append(label_map_dict[obj.get("id")])
        truncated.append(int(0))
        poses.append("Unspecified")

    width,height=image.size

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            filename.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
Exemple #40
0
def dict_to_tf_example(example):

    filename = example['filename']
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    width, height = image.size

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmins = []  # left x-coordinate
    ymins = []  # right x-coordinate
    xmaxs = []  # top y-coordinate
    ymaxs = []  # buttom y-coordinate
    classes = []  # class id
    classes_text = []  # class name

    for box in example['annotations']:
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin'] + box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABELS_MAP[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes)
        }))

    return tf_example
def prepare_example(image_path, annotations, label_map_dict):
  """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
  with tf.gfile.GFile(image_path, 'rb') as fid:
    encoded_png = fid.read()
  encoded_png_io = io.BytesIO(encoded_png)
  image = pil.open(encoded_png_io)
  image = np.asarray(image)

  key = hashlib.sha256(encoded_png).hexdigest()

  width = int(image.shape[1])
  height = int(image.shape[0])

  xmin_norm = annotations['2d_bbox_left'] / float(width)
  ymin_norm = annotations['2d_bbox_top'] / float(height)
  xmax_norm = annotations['2d_bbox_right'] / float(width)
  ymax_norm = annotations['2d_bbox_bottom'] / float(height)

  difficult_obj = [0]*len(xmin_norm)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
      'image/encoded': dataset_util.bytes_feature(encoded_png),
      'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
      'image/object/class/text': dataset_util.bytes_list_feature(
          [x.encode('utf8') for x in annotations['type']]),
      'image/object/class/label': dataset_util.int64_list_feature(
          [label_map_dict[x] for x in annotations['type']]),
      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
      'image/object/truncated': dataset_util.float_list_feature(
          annotations['truncated']),
      'image/object/alpha': dataset_util.float_list_feature(
          annotations['alpha']),
      'image/object/3d_bbox/height': dataset_util.float_list_feature(
          annotations['3d_bbox_height']),
      'image/object/3d_bbox/width': dataset_util.float_list_feature(
          annotations['3d_bbox_width']),
      'image/object/3d_bbox/length': dataset_util.float_list_feature(
          annotations['3d_bbox_length']),
      'image/object/3d_bbox/x': dataset_util.float_list_feature(
          annotations['3d_bbox_x']),
      'image/object/3d_bbox/y': dataset_util.float_list_feature(
          annotations['3d_bbox_y']),
      'image/object/3d_bbox/z': dataset_util.float_list_feature(
          annotations['3d_bbox_z']),
      'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
          annotations['3d_bbox_rot_y']),
  }))

  return example
def generate_tfrecord(group, path, label_dict):
    # Open image file
    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                           'rb') as gf:
        encoded_jpg = gf.read()

    jpg_io = io.BytesIO(encoded_jpg)
    img = Image.open(jpg_io)

    image_width, image_height = img.size
    filename = group.filename.encode('utf8')
    img_format = b'jpg'

    # For the data in regards to the image
    # we need to store the minimum and maximum x and y positions of the bounding boxes
    bx_min = []
    bx_max = []
    by_min = []
    by_max = []

    classes = []
    classes_texts = []

    # Itterate over every row of data in our given data in the dataset
    for i, row in group.object.iterrows():
        bx_min.append(row['xmin'] / image_width)
        bx_max.append(row['xmax'] / image_width)
        by_min.append(row['ymin'] / image_height)
        by_max.append(row['ymax'] / image_height)
        # Add Class text and the converted labels to
        classes_texts.append(row['class'].encode('utf8'))
        #print(row['class'], " Id from label_to_int: ", label_to_int(row['class'], label_dict))
        classes.append(label_to_int(row['class'], label_dict))

    # TF record example
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(image_height),
            'image/width':
            dataset_util.int64_feature(image_width),
            'img/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(img_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(bx_min),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(bx_max),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(by_min),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(by_max),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_texts),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes)
        }))
    return tf_example
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    xmins.append(annotation['x'])
                    xmaxs.append(annotation['x2'])
                    ymins.append(annotation['y'])
                    ymaxs.append(annotation['y2'])
                    classes_text.append(annotation['label'].encode('utf8'))
                    classes.append(1) # temporary, I need to assign labels to actual ids
                    difficult_obj.append(0)
                    truncated.append(0)
                    poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')
def create_tf_example(example):

    # Bosch
    #height = 720 # Image height
    #width = 1280 # Image width

    # Udacity data set
    height = 600  # Image height
    width = 800  # Image width

    filename = example[
        'filename']  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin'] + box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
Exemple #45
0
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: data corresponding to each image file.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
       dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = image_subdirectory
    with tf.gfile.GFile(img_path) as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'PNG':
        raise ValueError('Image format error')
        # bg = PIL.Image.new("RGB", image.size, (255,255,255))
        # x, y = image.size
        # bg.paste(image)
        # img_path = img_path[:-3] + 'jpg'
        # bg.save(img_path)
        # with tf.gfile.GFile(img_path) as fid:
        #   encoded_jpg = fid.read()
        #   encoded_jpg_io = io.BytesIO(encoded_jpg)
        #   image = PIL.Image.open(encoded_jpg_io)
        #   if image.format != 'JPEG':
        #     raise ValueError('sase')

    key = hashlib.sha256(encoded_jpg).hexdigest()

    width, height = image.size
    img_filename = img_path.split('/')[-1]
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    occlud = []

    xmin.append(int(data[2]) / width)
    ymin.append(int(data[3]) / height)
    xmax.append(int(data[4]) / width)
    ymax.append(int(data[5]) / height)
    class_name = data[1]
    classes_text.append(class_name)
    classes.append(label_map_dict[class_name])

    trun, occ = data[6].split(',')
    truncated.append(int(trun))
    occlud.append(int(occ))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(img_filename),
            'image/source_id':
            dataset_util.bytes_feature(img_filename),
            'image/key/sha256':
            dataset_util.bytes_feature(key),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('png'),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.int64_list_feature(occlud),
        }))
    return example
Exemple #46
0
def dict_to_tf_example(data, label_map_dict, img_path):

    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = image.width
    height = image.height

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []

    for shape in data['Layers']['Layer']['Shapes']['Shape']:

        text = shape['BlockText']['Text'].text
        if not (text.startswith('Panel') or text.startswith('panel')):
            continue

        attrib = shape['Data']['Extent'].attrib
        x = float(attrib['X'])
        y = float(attrib['Y'])
        w = float(attrib['Width'])
        h = float(attrib['Height'])

        xmin = x
        xmax = x + w
        ymin = y
        ymax = y + h

        xmin /= width
        ymin /= height
        xmax /= width
        ymax /= height

        if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01:
            print(img_path)

        xmins.append(xmin)
        ymins.append(ymin)
        xmaxs.append(xmax)
        ymaxs.append(ymax)

        class_name = 'Panel'
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example