Beispiel #1
0
def dict_to_tf_example(img_path, labels, sp):
    """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset (here only head available) directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    if image.mode != 'RGB':
        image = image.convert('RGB')

    width, height = image.size

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []

    for label in labels:
        _xmin, _ymin, _xmax, _ymax = label.split(sp)[:4]
        xmin.append(int(_xmin) / width)
        ymin.append(int(_ymin) / height)
        xmax.append(int(_xmax) / width)
        ymax.append(int(_ymax) / height)
        classes.append(1)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg),
            'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')),
            'image/object/bbox/xmin': tfrecord_utils.float_list_feature(xmin),
            'image/object/bbox/xmax': tfrecord_utils.float_list_feature(xmax),
            'image/object/bbox/ymin': tfrecord_utils.float_list_feature(ymin),
            'image/object/bbox/ymax': tfrecord_utils.float_list_feature(ymax),
            'image/object/class/label': tfrecord_utils.int64_list_feature(
                classes),
        }))
    return example
Beispiel #2
0
def _create_tf_example(entry):
    """ Creates a tf.train.Example to be saved in the TFRecord file.

        Args:
            entry: string containing the path to a image and its label.
        Return:
            tf_example: tf.train.Example containing the info stored in feature
    """
    image_path, label = _get_image_and_label_from_entry(entry)

    # Convert the jpeg image to raw image.
    image = Image.open(image_path)
    image_np = np.array(image)
    image_raw = image_np.tostring()

    # Data which is going to be stored in the TFRecord file
    feature = {
        'image': tfrecord_utils.bytes_feature(image_raw),
        'image/height': tfrecord_utils.int64_feature(image_np.shape[0]),
        'image/width': tfrecord_utils.int64_feature(image_np.shape[1]),
        'label': tfrecord_utils.int64_feature(label),
    }

    tf_example = tf.train.Example(features=tf.train.Features(feature=feature))

    return tf_example
Beispiel #3
0
def dict_to_tf_example(img_path, labels):
  """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset (here only head available) directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
  with tf.gfile.GFile(img_path, 'rb') as fid:
    encoded_jpg = fid.read()

  encoded_jpg_io = io.BytesIO(encoded_jpg)
  image = PIL.Image.open(encoded_jpg_io)
  if image.format != 'JPEG':
    raise ValueError('Image format not JPEG')
  if image.mode != 'RGB':
    image = image.convert('RGB')

  width, height = image.size

  x0 = []
  y0 = []
  x1 = []
  y1 = []
  x2 = []
  y2 = []
  x3 = []
  y3 = []
  classes = []

  #labels : [2, 4, num_boxes]
  if labels.ndim == 3:
    for i in range(labels.shape[2]):
      y0.append(labels[1][0][i] / height)
      x0.append(labels[0][0][i] / width )
      y1.append(labels[1][1][i] / height)
      x1.append(labels[0][1][i] / width )
      y2.append(labels[1][2][i] / height)
      x2.append(labels[0][2][i] / width )
      y3.append(labels[1][3][i] / height)
      x3.append(labels[0][3][i] / width )
      
      classes.append(1)

  else:
    y0.append(labels[1][0] / height)
    x0.append(labels[0][0] / width )
    y1.append(labels[1][1] / height)
    x1.append(labels[0][1] / width )
    y2.append(labels[1][2] / height)
    x2.append(labels[0][2] / width )
    y3.append(labels[1][3] / height)
    x3.append(labels[0][3] / width )

    classes.append(1)

  example = tf.train.Example(features=tf.train.Features(feature={
      'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg),
      'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')),
      'image/object/bbox/y0': tfrecord_utils.float_list_feature(y0),
      'image/object/bbox/x0': tfrecord_utils.float_list_feature(x0),
      'image/object/bbox/y1': tfrecord_utils.float_list_feature(y1),
      'image/object/bbox/x1': tfrecord_utils.float_list_feature(x1),
      'image/object/bbox/y2': tfrecord_utils.float_list_feature(y2),
      'image/object/bbox/x2': tfrecord_utils.float_list_feature(x2),
      'image/object/bbox/y3': tfrecord_utils.float_list_feature(y3),
      'image/object/bbox/x3': tfrecord_utils.float_list_feature(x3),
      'image/object/class/label': tfrecord_utils.int64_list_feature(classes),
  }))
  return example
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset (here only head available) directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.splitext(
        os.path.join(image_subdirectory, data['filename']))[0] + ".jpg"
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    if image.mode != 'RGB':
        image = image.convert('RGB')
    # generate hash key for image
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    difficult_obj = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        class_name = obj['name']
        classes_text.append(class_name.encode('utf8'))
        classes.append(int(label_map_dict[class_name]) - 1)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_utils.int64_feature(height),
            'image/width':
            tfrecord_utils.int64_feature(width),
            'image/filename':
            tfrecord_utils.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            tfrecord_utils.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            tfrecord_utils.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_utils.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_utils.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            tfrecord_utils.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_utils.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_utils.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_utils.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_utils.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_utils.int64_list_feature(classes),
            'image/object/difficult':
            tfrecord_utils.int64_list_feature(difficult_obj),
        }))
    return example
def dict_to_tf_example(img_path, labels, sp):
    """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset (here only head available) directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    if image.mode != 'RGB':
        image = image.convert('RGB')

    width, height = image.size

    x0 = []
    y0 = []
    x1 = []
    y1 = []
    x2 = []
    y2 = []
    x3 = []
    y3 = []
    classes = []

    for label in labels:
        _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, txt = label.split(sp)[:9]

        if "###" in txt:
            continue

        try:
            _x0 = int(_x0)
        except:
            _x0 = int(_x0[1:])

        _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
            int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3]
        ]

        y0.append(_y0 / height)
        x0.append(_x0 / width)
        y1.append(_y1 / height)
        x1.append(_x1 / width)
        y2.append(_y2 / height)
        x2.append(_x2 / width)
        y3.append(_y3 / height)
        x3.append(_x3 / width)
        classes.append(1)

    if len(y0) == 0:
        return None

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg),
            'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')),
            'image/object/bbox/y0': tfrecord_utils.float_list_feature(y0),
            'image/object/bbox/x0': tfrecord_utils.float_list_feature(x0),
            'image/object/bbox/y1': tfrecord_utils.float_list_feature(y1),
            'image/object/bbox/x1': tfrecord_utils.float_list_feature(x1),
            'image/object/bbox/y2': tfrecord_utils.float_list_feature(y2),
            'image/object/bbox/x2': tfrecord_utils.float_list_feature(x2),
            'image/object/bbox/y3': tfrecord_utils.float_list_feature(y3),
            'image/object/bbox/x3': tfrecord_utils.float_list_feature(x3),
            'image/object/class/label': tfrecord_utils.int64_list_feature(
                classes),
        }))
    return example
Beispiel #6
0
def dict_to_tf_example(img_path, labels, image_size=None):
    """Convert XML derived dict to tf.Example proto.
  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.
  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset (here only head available) directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
  Returns:
    example: The converted tf.Example.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    if image.mode != 'RGB':
        image = image.convert('RGB')

    width, height = image.size
    # quadrilateral coordinate
    q_x0 = []
    q_y0 = []
    q_x1 = []
    q_y1 = []
    q_x2 = []
    q_y2 = []
    q_x3 = []
    q_y3 = []
    q_classes = []

    # normal coordinate
    o_cx = []
    o_cy = []
    o_w = []
    o_h = []

    ########################################################
    #
    # Normalize all value to [0~1]
    # This steps makes resizing purpose much more easily
    #
    ########################################################
    # labels : [2, 4, num_boxes]
    if labels.ndim == 3:
        for i in range(labels.shape[2]):

            q_x0.append(labels[0][0][i] / width)
            q_y0.append(labels[1][0][i] / height)

            q_x1.append(labels[0][1][i] / width)
            q_y1.append(labels[1][1][i] / height)

            q_x2.append(labels[0][2][i] / width)
            q_y2.append(labels[1][2][i] / height)

            q_x3.append(labels[0][3][i] / width)
            q_y3.append(labels[1][3][i] / height)

            q_classes.append(1)

            x_min = min(labels[0][0][i], labels[0][1][i], labels[0][2][i],
                        labels[0][3][i])
            y_min = min(labels[1][0][i], labels[1][1][i], labels[1][2][i],
                        labels[1][3][i])

            x_max = max(labels[0][0][i], labels[0][1][i], labels[0][2][i],
                        labels[0][3][i])
            y_max = max(labels[1][0][i], labels[1][1][i], labels[1][2][i],
                        labels[1][3][i])

            o_cx.append((x_min + x_max) / (width * 2))
            o_cy.append((y_min + y_max) / (height * 2))
            o_w.append(abs(x_max - x_min) / width)
            o_h.append(abs(y_max - y_min) / height)

    # labels : [2, 4]
    else:
        q_x0.append(labels[0][0] / width)
        q_y0.append(labels[1][0] / height)

        q_x1.append(labels[0][1] / width)
        q_y1.append(labels[1][1] / height)

        q_x2.append(labels[0][2] / width)
        q_y2.append(labels[1][2] / height)

        q_x3.append(labels[0][3] / width)
        q_y3.append(labels[1][3] / height)

        q_classes.append(1)

        x_min = min(labels[0][0], labels[0][1], labels[0][2], labels[0][3])
        y_min = min(labels[1][0], labels[1][1], labels[1][2], labels[1][3])

        x_max = max(labels[0][0], labels[0][1], labels[0][2], labels[0][3])
        y_max = max(labels[1][0], labels[1][1], labels[1][2], labels[1][3])

        o_cx.append((x_min + x_max) / (width * 2))
        o_cy.append((y_min + y_max) / (height * 2))
        o_w.append(abs(x_max - x_min) / width)
        o_h.append(abs(y_max - y_min) / height)

    if image_size[0] and image_size[1] is not None:
        width, height = image_size
        image.resize((width, height), PIL.Image.ANTIALIAS)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/encoded':
            tfrecord_utils.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_utils.bytes_feature('jpg'.encode('utf8')),
            'image/width':
            tfrecord_utils.int64_feature(width),
            'image/height':
            tfrecord_utils.int64_feature(height),
            'image/object/bbox/y0':
            tfrecord_utils.float_list_feature(q_y0),
            'image/object/bbox/x0':
            tfrecord_utils.float_list_feature(q_x0),
            'image/object/bbox/y1':
            tfrecord_utils.float_list_feature(q_y1),
            'image/object/bbox/x1':
            tfrecord_utils.float_list_feature(q_x1),
            'image/object/bbox/y2':
            tfrecord_utils.float_list_feature(q_y2),
            'image/object/bbox/x2':
            tfrecord_utils.float_list_feature(q_x2),
            'image/object/bbox/y3':
            tfrecord_utils.float_list_feature(q_y3),
            'image/object/bbox/x3':
            tfrecord_utils.float_list_feature(q_x3),
            'image/object/class/label':
            tfrecord_utils.int64_list_feature(q_classes),
            'image/object/bbox/cy':
            tfrecord_utils.float_list_feature(o_cy),
            'image/object/bbox/cx':
            tfrecord_utils.float_list_feature(o_cx),
            'image/object/bbox/w':
            tfrecord_utils.float_list_feature(o_w),
            'image/object/bbox/h':
            tfrecord_utils.float_list_feature(o_h),
        }))
    return example
Beispiel #7
0
def dict_to_tf_example(data, image_file_name, image_directory, label_map_dict,
                       coder):
    img_name = image_file_name + '.jpg'
    full_path = os.path.join(image_directory, img_name)
    if not tf.gfile.Exists(full_path):
        full_path = os.path.join(full_path[:-3] + 'jpeg')
    if tf.gfile.Exists(full_path) != 1:
        print('1')
        return 0
    encoded_jpg = tf.gfile.GFile(full_path, 'rb').read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    width = int(data['size']['width'])
    height = int(data['size']['height'])
    if width == 0 or height == 0:
        print('2')
        return 0

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []

    if 'object' in data:
        for obj in data['object']:
            obj['name'] = obj['name'].lower()

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)

            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])

            if len(classes) == 0:
                return 0
            elif len(classes) != len(classes_text):
                return 0
            elif len(classes) != len(xmin):
                return 0

    if len(classes) >= 100:
        print('This image has more than 100 objects :', image_file_name)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            tfrecord_utils.int64_feature(height),
            'image/width':
            tfrecord_utils.int64_feature(width),
            'image/filename':
            tfrecord_utils.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            tfrecord_utils.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            tfrecord_utils.bytes_feature(key.encode('utf8')),
            'image/encoded':
            tfrecord_utils.bytes_feature(encoded_jpg),
            'image/format':
            tfrecord_utils.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            tfrecord_utils.float_list_feature(xmin),
            'image/object/bbox/xmax':
            tfrecord_utils.float_list_feature(xmax),
            'image/object/bbox/ymin':
            tfrecord_utils.float_list_feature(ymin),
            'image/object/bbox/ymax':
            tfrecord_utils.float_list_feature(ymax),
            'image/object/class/text':
            tfrecord_utils.bytes_list_feature(classes_text),
            'image/object/class/label':
            tfrecord_utils.int64_list_feature(classes)
        }))

    return example