예제 #1
0
def __create_tf_example(frame_data, sorted_label_list):
    im = PIL.Image.open(io.BytesIO(frame_data.image))
    arr = io.BytesIO()
    if frame_data.format == 'jpg':
        format = 'JPEG'
    else:
        format = frame_data.format.upper()
    im.save(arr, format=format)
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    rects, labels = bbox_writer.convert_text_to_rects_and_labels(
        frame_data.bboxes_text)
    # List of normalized coordinates, 1 per box, capped to [0, 1]
    xmins = [max(min(rect[0] / width, 1), 0) for rect in rects]  # left x
    xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects]  # right x
    ymins = [max(min(rect[1] / height, 1), 0) for rect in rects]  # top y
    ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects]  # bottom y

    classes_txt = [label.encode('utf-8') for label in labels]  # String names
    label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)}
    class_ids = [label_to_id_dict[label] for label in labels]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/source_id':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(frame_data.format.encode('utf-8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_txt),
            'image/object/class/label':
            dataset_util.int64_list_feature(class_ids),
        }))
    label_counter_for_frame = collections.Counter(labels)
    is_negative = len(rects) == 0
    return tf_example, label_counter_for_frame, is_negative
예제 #2
0
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_example(example):
    
    # Udacity real data set
    height = 600 # Image height
    width = 800 # Image width

    filename = example['filename'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin']+ box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example
예제 #4
0
def group_to_tf_record(point, image_directory):
    format_png = b'png'
    format_jpg = b'jpeg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    class_nums = []
    class_ids = []
    # changed point[0] to point as is just one point
    image_id = point['id']

    if image_id.startswith('frame'):
        filename = os.path.join(image_directory, image_id + '.png')
        format = format_png
    else:
        filename = os.path.join(image_directory, image_id + '.jpg') #.decode()
        format = format_jpg

    try:
        image = Image.open(filename)
        width, height = image.size
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_image = bytes(fid.read())
    except:
        return None
    key = hashlib.sha256(encoded_image).hexdigest()
    for anno in point['annotations']:
        xmins.append(float(anno['x0']))
        xmaxs.append(float(anno['x1']))
        ymins.append(float(anno['y0']))
        ymaxs.append(float(anno['y1']))
        class_nums.append(anno['class_num'])
        class_ids.append(bytes(anno['label'].encode('utf8')))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))),
        'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(class_ids),
        'image/object/class/label': dataset_util.int64_list_feature(class_nums)
    }))
    return tf_example
def create_tfdatapoint(file_loc, file, labels):
    img = Image.open(os.path.join(file_loc, 'images', file))
    (width, height) = img.size
    encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file),
                                "rb").read()
    encoded = bytes(encoded)
    image_format = b'png'
    filename = file.split('.')[0]
    data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt'))
    data = data.reshape(int(data.size / 5), 5)

    classes = [int(x) for x in data[:, 0]]
    classes_text = [labels[x].encode('utf8') for x in classes]
    xmins = data[:, 1] - (data[:, 3] / 2.0)
    xmaxs = data[:, 1] + (data[:, 3] / 2.0)
    ymins = data[:, 2] - (data[:, 4] / 2.0)
    ymaxs = data[:, 2] + (data[:, 4] / 2.0)

    tf_label_and_data = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/source_id':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/encoded':
            dataset_util.bytes_feature(encoded),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_label_and_data
예제 #6
0
def create_tf_example(filename):
    coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2]
    leftUp, rightDown = [[int(eel) for eel in el.split('&')]
                         for el in coordinates.split('_')]
    xmin, ymin = leftUp
    xmax, ymax = rightDown

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    height = image.height
    width = image.width
    key = hashlib.sha256(encoded_jpg).hexdigest()

    ymins = [float(ymin) / height]
    xmins = [float(xmin) / width]
    ymaxs = [float(ymax) / height]
    xmaxs = [float(xmax) / width]

    labels_text = ['vehicle plate'.encode('utf8')]
    labels = [2]

    # print("---------image size:",image.size)
    # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax))
    # print("---------width:{}, height:{}".format(width,height))

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(labels_text),
        'image/object/class/label': dataset_util.int64_list_feature(labels),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
예제 #7
0
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    bboxes = img_data['bboxes']
    xmin, xmax, ymin, ymax = [], [], [], []
    for bbox in bboxes:
        xmin.append(bbox[2])
        xmax.append(bbox[0])
        ymin.append(bbox[3])
        ymax.append(bbox[1])
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(img_data['height']),
            'image/width':
            dataset_util.int64_feature(img_data['width']),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/label':
            dataset_util.int64_list_feature(img_data['labels']),
            'image/object/class/text':
            dataset_util.bytes_list_feature(img_data['text']),
            'image/encoded':
            dataset_util.bytes_feature(img_data['pixel_data']),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
            'image/object/class/file':
            dataset_util.bytes_feature(img_data['file'].encode('utf-8')),
        }))
    return example
예제 #8
0
def create_tf_example(height, width, filename, encoded_image_data,
                      image_format, xmins, xmaxs, ymins, ymaxs, classes_text,
                      classes):
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),  # Image height
            'image/width':
            dataset_util.int64_feature(width),  # Image width
            'image/filename':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/source_id':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/encoded':
            dataset_util.bytes_feature(
                encoded_image_data),  # Encoded image bytes
            'image/format':
            dataset_util.bytes_feature(image_format),  # b'jpeg' or b'png'
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(
                xmins),  # normalized left x coordinate in bounding box
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(
                xmaxs),  # normalized right x coordinate in bounding box
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(
                ymins),  # normalized top y coordinate in bounding box
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(
                ymaxs),  # normalized bottom y coordinate in bounding box
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                classes_text),  # string class name of bounding box
            'image/object/class/label':
            dataset_util.int64_list_feature(
                classes),  # integer class id of bounding box
        }))
    return tf_example
예제 #9
0
def create_tf_example(csv, img_dir):
    img_fname = csv[0]
    x1, y1, x2, y2 = list(map(int, csv[1:-1]))
    cls_idx = int(csv[-1])
    cls_text = config.CLASS_NAMES[cls_idx].encode('utf8')
    with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    xmin = [x1 / width]
    xmax = [x2 / width]
    ymin = [y1 / height]
    ymax = [y2 / height]
    cls_text = [cls_text]
    cls_idx = [cls_idx]

    filename = img_fname.encode('utf8')
    image_format = b'jpg'

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(cls_text),
        'image/object/class/label': dataset_util.int64_list_feature(cls_idx),
    }))

    return tf_example
def create_tf_example(group, path):
    # Class numeric labels as dict
    class_dict = class_img_dict(path)

    #Opening and readinf the files
    with tf.gfile.GFile(
            os.path.join(path, '{}/{}'.format(group.label, group.filename)),
            'rb') as fid:
        encoded_jpg = fid.read()

    # Encode the image in jpeg format to array values
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)

    # Setting up the image size
    width, height = image.size

    #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_dict[row['class']])

    # This is already exisiting code to convert csv to tfrecord
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
예제 #11
0
def create_tf_example(example):
    (obj_type, fileidx, annotations, data, sh) = example
    # TODO(user): Populate the following variables from your example.
    height = 240  # Image height
    width = 304  # Image width
    filename = str.encode(
        fileidx +
        'npy.gz')  # Filename of the image. Empty if image is not from file
    encoded_image_data = data  # Encoded image bytes
    indices = np.int64(np.random.random((1000, 3)) * [304, 240, 2])
    indices0, indices1, indices2 = indices.T
    indices = indices.flatten().tolist()
    # indices = np.int64(np.random.random((1, 3))*[304, 240, 3])
    # indices = indices.tobytes()

    sh = np.array([304, 240, 2]).astype(np.int64)

    values = (np.random.random(len(indices)) * 256).astype(
        np.float32).flatten().tolist()
    # values = values.tobytes()

    image_format = b'vEvent'  # b'jpeg' or b'png'

    xmins = [
        annotations[2]
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [annotations[4]
             ]  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
        annotations[1]
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [annotations[3]
             ]  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = [str.encode(obj_type)
                    ]  # List of string class name of bounding box (1 per box)
    classes = [classesid[obj_type]
               ]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/indices':
            dataset_util.int64_list_feature(indices),
            'image/indices0':
            dataset_util.int64_list_feature(indices0),
            'image/indices1':
            dataset_util.int64_list_feature(indices1),
            'image/indices2':
            dataset_util.int64_list_feature(indices2),
            'image/values':
            dataset_util.float_list_feature(values),
            'image/shape':
            dataset_util.int64_list_feature(sh),
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
                      keypoint_annotations_dict=None,
                      densepose_annotations_dict=None,
                      remove_non_person_annotations=False,
                      remove_non_person_images=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
        coordinates in the official COCO dataset are given as [x, y, width,
        height] tuples using absolute coordinates where x, y represent the
        top-left (0-indexed) corner.  This function converts to the format
        expected by the Tensorflow Object Detection API (which is which is
        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
        size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed by the
      'id' field of each category.  See the label_map_util.create_category_index
      function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    keypoint_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
    densepose_annotations_dict: A dictionary that maps from annotation_id to a
      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
      representing part surface coordinates. For more information see
      http://densepose.org/.
    remove_non_person_annotations: Whether to remove any annotations that are
      not the "person" class.
    remove_non_person_images: Whether to remove any images that do not contain
      at least one "person" annotation.

  Returns:
    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
    num_keypoint_annotation_skipped: Number of keypoint annotations that were
      skipped.
    num_densepose_annotation_skipped: Number of DensePose annotations that were
      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    keypoints_x = []
    keypoints_y = []
    keypoints_visibility = []
    keypoints_name = []
    num_keypoints = []
    include_keypoint = keypoint_annotations_dict is not None
    num_annotations_skipped = 0
    num_keypoint_annotation_used = 0
    num_keypoint_annotation_skipped = 0
    dp_part_index = []
    dp_x = []
    dp_y = []
    dp_u = []
    dp_v = []
    dp_num_points = []
    densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
    include_densepose = densepose_annotations_dict is not None
    num_densepose_annotation_used = 0
    num_densepose_annotation_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        category_name = category_index[category_id]['name'].encode('utf8')
        if remove_non_person_annotations and category_name != b'person':
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_ids.append(category_id)
        category_names.append(category_name)
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())

        if include_keypoint:
            annotation_id = object_annotations['id']
            if annotation_id in keypoint_annotations_dict:
                num_keypoint_annotation_used += 1
                keypoint_annotations = keypoint_annotations_dict[annotation_id]
                keypoints = keypoint_annotations['keypoints']
                num_kpts = keypoint_annotations['num_keypoints']
                keypoints_x_abs = keypoints[::3]
                keypoints_x.extend(
                    [float(x_abs) / image_width for x_abs in keypoints_x_abs])
                keypoints_y_abs = keypoints[1::3]
                keypoints_y.extend(
                    [float(y_abs) / image_height for y_abs in keypoints_y_abs])
                keypoints_visibility.extend(keypoints[2::3])
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(num_kpts)
            else:
                keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
                keypoints_name.extend(_COCO_KEYPOINT_NAMES)
                num_keypoints.append(0)

        if include_densepose:
            annotation_id = object_annotations['id']
            if (annotation_id in densepose_annotations_dict
                    and all(key in densepose_annotations_dict[annotation_id]
                            for key in densepose_keys)):
                dp_annotations = densepose_annotations_dict[annotation_id]
                num_densepose_annotation_used += 1
                dp_num_points.append(len(dp_annotations['dp_I']))
                dp_part_index.extend([
                    int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I']
                ])
                # DensePose surface coordinates are defined on a [256, 256] grid
                # relative to each instance box (i.e. absolute coordinates in range
                # [0., 256.]). The following converts the coordinates
                # so that they are expressed in normalized image coordinates.
                dp_x_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_x']
                ]
                dp_x_norm = [(float(x) + x_box_rel * width) / image_width
                             for x_box_rel in dp_x_box_rel]
                dp_y_box_rel = [
                    clip_to_unit(val / 256.) for val in dp_annotations['dp_y']
                ]
                dp_y_norm = [(float(y) + y_box_rel * height) / image_height
                             for y_box_rel in dp_y_box_rel]
                dp_x.extend(dp_x_norm)
                dp_y.extend(dp_y_norm)
                dp_u.extend(dp_annotations['dp_U'])
                dp_v.extend(dp_annotations['dp_V'])
            else:
                dp_num_points.append(0)

    if (remove_non_person_images
            and not any(name == b'person' for name in category_names)):
        return (key, None, num_annotations_skipped,
                num_keypoint_annotation_skipped,
                num_densepose_annotation_skipped)
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    if include_keypoint:
        feature_dict['image/object/keypoint/x'] = (
            dataset_util.float_list_feature(keypoints_x))
        feature_dict['image/object/keypoint/y'] = (
            dataset_util.float_list_feature(keypoints_y))
        feature_dict['image/object/keypoint/num'] = (
            dataset_util.int64_list_feature(num_keypoints))
        feature_dict['image/object/keypoint/visibility'] = (
            dataset_util.int64_list_feature(keypoints_visibility))
        feature_dict['image/object/keypoint/text'] = (
            dataset_util.bytes_list_feature(keypoints_name))
        num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) -
                                           num_keypoint_annotation_used)
    if include_densepose:
        feature_dict['image/object/densepose/num'] = (
            dataset_util.int64_list_feature(dp_num_points))
        feature_dict['image/object/densepose/part_index'] = (
            dataset_util.int64_list_feature(dp_part_index))
        feature_dict['image/object/densepose/x'] = (
            dataset_util.float_list_feature(dp_x))
        feature_dict['image/object/densepose/y'] = (
            dataset_util.float_list_feature(dp_y))
        feature_dict['image/object/densepose/u'] = (
            dataset_util.float_list_feature(dp_u))
        feature_dict['image/object/densepose/v'] = (
            dataset_util.float_list_feature(dp_v))
        num_densepose_annotation_skipped = (len(densepose_annotations_dict) -
                                            num_densepose_annotation_used)

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return (key, example, num_annotations_skipped,
            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
예제 #13
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys:
        [u'license', u'file_name', u'coco_url', u'height', u'width',
        u'date_captured', u'flickr_url', u'id']
      annotations_list:
        list of dicts with keys:
        [u'segmentation', u'area', u'iscrowd', u'image_id',
        u'bbox', u'category_id', u'id']
        Notice that bounding box coordinates in the official COCO dataset are
        given as [x, y, width, height] tuples using absolute coordinates where
        x, y represent the top-left (0-indexed) corner.  This function converts
        to the format expected by the Tensorflow Object Detection API (which is
        which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
        to image size).
      image_dir: directory containing the image files.
      category_index: a dict containing COCO category information keyed
        by the 'id' field of each category.  See the
        label_map_util.create_category_index function.
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.
    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
예제 #14
0
def create_tf_example(filename, label_file):
    img = cv2.imread(filename)
    height, width, channels = img.shape

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_image_data = fid.read()

    image_format = b'jpg'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    with open(label_file, 'r') as f:
        csvreader = csv.reader(f, delimiter=' ')
        head = True

        for row in csvreader:
            if head:
                head = False
                continue

            name = row[-1]
            classes_text.append(name)
            classes.append(get_index(name))

            xmins.append(float(row[0]) / width)
            xmaxs.append(float(row[2]) / width)
            ymins.append(float(row[1]) / height)
            ymaxs.append(float(row[3]) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
예제 #15
0
def background_tf_example(image_path, ):
    """
    Args:
      image_path: Full path to image file

    Returns:
      example: The converted tf.Example.
    """

    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    filename = full_path.split('/')[-1]
    width = image.width
    height = image.height

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
예제 #16
0
def xml_to_tf(path_input, path_output):
    xml_list = []
    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]

    print(path_output)
    writer = tf.io.TFRecordWriter(path_output)

    files = os.listdir(path_input)
    for file in files:
        if file.endswith(".xml"):
            xmlFile = path_input + file

            tree = ET.parse(xmlFile)
            root = tree.getroot()

            filename = root[1].text
            width = int(root[4][0].text)
            height = int(root[4][1].text)

            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            classes_text = []
            classes = []

            for member in root.findall('object'):
                beer = member[0].text
                xmin = int(member[4][0].text)
                ymin = int(member[4][1].text)
                xmax = int(member[4][2].text)
                ymax = int(member[4][3].text)

                xmins.append(xmin / width)
                xmaxs.append(xmax / width)
                ymins.append(ymin / height)
                ymaxs.append(ymax / height)
                classes_text.append(beer.encode('utf8'))
                classes.append(class_text_to_int(beer))

            with tf.io.gfile.GFile(
                    os.path.join(path_input, '{}'.format(filename)),
                    'rb') as fid:
                encoded_jpg = fid.read()
                print(encoded_jpg)

            tf_example = tf.train.Example(features=tf.train.Features(
                feature={
                    'image/height':
                    dataset_util.int64_feature(height),
                    'image/width':
                    dataset_util.int64_feature(width),
                    'image/filename':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/source_id':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpg),
                    'image/format':
                    dataset_util.bytes_feature(IMAGE_FORMAT),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature(xmins),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature(xmaxs),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature(ymins),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature(ymaxs),
                    'image/object/class/text':
                    dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label':
                    dataset_util.int64_list_feature(classes),
                }))

            writer.write(tf_example.SerializeToString())
    writer.close()
    output_path = os.path.join(os.getcwd(), path_output)
    print('Successfully created the TFRecords: {}'.format(output_path))
예제 #17
0
    def createTFExample(self):
        """Convert XML derived dict to tf.Example proto.
        Notice that this function normalizes the bounding box coordinates provided
        by the raw data.
        Args: None
        Returns:
            example: The converted tf.Example.
        """
        with tf.io.gfile.GFile(self.xml, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        # the image might be processed in a different location
        # so overwrite the path to the input image path for consistency
        data['path'] = self.jpg if self.crop == '' else self.__cropImage(data)

        print(f"Processing image {data['path']}")

        width = int(data['size']['width'])
        height = int(data['size']['height'])
        filename = data['filename'].encode('utf8')
        with tf.io.gfile.GFile(data['path'], 'rb') as fid:
            encoded_image_data = fid.read()
        image_format = 'jpeg'.encode('utf8')

        # List of normalized left x coordinates in bounding box (1 per box)
        xmins = []
        # List of normalized right x coordinates in bounding box (1 per box)
        xmaxs = []
        # List of normalized top y coordinates in bounding box (1 per box)
        ymins = []
        # List of normalized bottom y coordinates in bounding box (1 per box)
        ymaxs = []
        # List of string class name of bounding box (1 per box)
        classes_text = []
        classes_id = []  # List of integer class id of bounding box (1 per box)

        image = util.loadImage(data['path'])

        for obj in data['object']:
            if obj['name'] not in classes or not self.__isValidBox(
                    obj, width, height):
                print('Unexpected object: ' + str(obj) + ' in ' + data['path'])
                continue
            xmins.append(float(obj['bndbox']['xmin']) / width)
            ymins.append(float(obj['bndbox']['ymin']) / height)
            xmaxs.append(float(obj['bndbox']['xmax']) / width)
            ymaxs.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes_id.append(getClassID(obj['name']))
            util.drawBox(image, self.__encodeBox(obj['bndbox']))

        util.saveImage(image,
                       str(data['path']).replace(".jpg", "-with-boxes.jpg"))

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes_id),
            }))
        return tf_example
예제 #18
0
def toTfrecord(f, pathTofile):
    height = None  # Image height
    width = None  # Image width
    filename = None  # Filename of the image. Empty if image is not from file
    encoded_image_data = None  # Encoded image bytes
    image_format = b'jpeg'  # b'jpeg' or b'png'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    poses = []
    truncated = []
    difficult_obj = []
    filename = f.readline().rstrip()
    print(filename)
    full_path = os.path.join(pathTofile, filename)
    print(full_path)
    with tf.io.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    image_raw = cv2.imread(full_path)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    height, width, channel = image_raw.shape
    print("height is %d, width is %d, channel is %d" %
          (height, width, channel))

    face_num = int(f.readline().rstrip())
    valid_face_num = 0
    print("face_num:>>", face_num)
    for i in range(face_num):
        annot = f.readline().rstrip().split()
        # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY

        if (float(annot[2]) > 25.0):
            if (float(annot[3]) > 30.0):
                xmins.append(max(0.005, (float(annot[0]) / width)))
                ymins.append(max(0.005, (float(annot[1]) / height)))
                xmaxs.append(
                    min(0.995, ((float(annot[0]) + float(annot[2])) / width)))
                ymaxs.append(
                    min(0.995, ((float(annot[1]) + float(annot[3])) / height)))
                classes_text.append("face".encode('utf8'))
                classes.append(0)
                print(xmins[-1], ymins[-1], xmaxs[-1], ymaxs[-1],
                      classes_text[-1], classes[-1])
                valid_face_num += 1

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(int(height)),
        'image/width':
        dataset_util.int64_feature(int(width)),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(value=classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
    }
    print("xxxxx", xmins)
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
예제 #19
0
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    # key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = (annotations['2d_bbox_left']) / float(width)
    ymin_norm = (annotations['2d_bbox_top']) / float(height)
    xmax_norm = (annotations['2d_bbox_right']) / float(width)
    ymax_norm = (annotations['2d_bbox_bottom']) / float(height)

    # difficult_obj = [0]*len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            # 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
            # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            # 'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            # 'image/object/class/text': dataset_util.bytes_list_feature(
            #     [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            # 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
            # 'image/object/truncated': dataset_util.float_list_feature(
            #     annotations['truncated']),
            # 'image/object/alpha': dataset_util.float_list_feature(
            #     annotations['alpha']),
            # 'image/object/3d_bbox/height': dataset_util.float_list_feature(
            #     annotations['3d_bbox_height']),
            # 'image/object/3d_bbox/width': dataset_util.float_list_feature(
            #     annotations['3d_bbox_width']),
            # 'image/object/3d_bbox/length': dataset_util.float_list_feature(
            #     annotations['3d_bbox_length']),
            # 'image/object/3d_bbox/x': dataset_util.float_list_feature(
            #     annotations['3d_bbox_x']),
            # 'image/object/3d_bbox/y': dataset_util.float_list_feature(
            #     annotations['3d_bbox_y']),
            # 'image/object/3d_bbox/z': dataset_util.float_list_feature(
            #     annotations['3d_bbox_z']),
            # 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
            #     annotations['3d_bbox_rot_y']),
        }))

    return example
예제 #20
0
def dict_to_tf_example(data,
                       dataset_directory,
                       ignore_difficult_instances=False,
                       image_subdirectory='All_Images'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    #img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
    full_path = os.path.join(dataset_directory, 'SSD_Training_Data',
                             'All_Images', data['filename'])
    full_path = full_path.replace('_mp4', '.mp4')
    if '.jpg' not in full_path: full_path = full_path + '.jpg'
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    #poses = []
    difficult_obj = []
    boxes = []

    small_boxes_count = 0

    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            nm = obj['name']
            if nm.lower() == 'Other':
                class_id = 1
            else:
                class_id = 1

            xmin_norm = float(obj['bndbox']['xmin']) / width
            ymin_norm = float(obj['bndbox']['ymin']) / height
            xmax_norm = float(obj['bndbox']['xmax']) / width
            ymax_norm = float(obj['bndbox']['ymax']) / height

            # Skip boxes with size less than:
            if min(xmax_norm - xmin_norm, ymax_norm - ymin_norm) < 0.008:
                small_boxes_count += 1
                continue

            difficult_obj.append(int(difficult))

            xmin.append(xmin_norm)
            ymin.append(ymin_norm)
            xmax.append(xmax_norm)
            ymax.append(ymax_norm)

            boxes.append([xmin[-1], ymin[-1], xmax[-1], ymax[-1]])

            # classes_text.append(obj['name'].encode('utf8'))
            classes_text.append('ferrari'.encode('utf8'))
            classes.append(class_id)
            truncated.append(int(obj['truncated']))
            #poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            #'image/object/view': dataset_util.bytes_list_feature(poses),
        }))

    return example, boxes, small_boxes_count
예제 #21
0
def read_xml_make_tfrecord():
    num_data = 8
    for i in range(num_data):
        globals()['train_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data))] = tensorflow.io.TFRecordWriter(
                'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data)))

    for i in range(int(num_data / 8)):
        globals()['test_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))
        globals()['valid_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))

    length = len(os.listdir(folder))

    for number, img_name in enumerate(os.listdir(folder)):
        if img_name[-4:] != '.jpg': continue
        filename = img_name[:-4]
        img = cv2.imread(folder + filename + ".jpg")
        height, width = img.shape[:2]

        mask = cv2.imread('mask/' + filename + '.jpg', 0)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5),
                                                               np.uint8))
        cv2.imshow("asdas", mask)
        cv2.waitKey()
        _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE,
                                          cv2.CHAIN_APPROX_SIMPLE)
        print(contours)
        contours = sorted(contours, key=lambda x: len(x), reverse=True)

        x = [temp[0][0] for temp in contours[0]]
        y = [temp[0][1] for temp in contours[0]]
        xmin = min(x)
        xmax = max(x)
        ymin = min(y)
        ymax = max(y)
        # cv2.circle(img,(xmin,ymin),5,(255,0,0),5)
        # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5)
        # cv2.imshow("asd",img)
        # cv2.waitKey()
        object_name = 'passport'
        pixel_val = 255
        with tensorflow.io.gfile.GFile(folder + filename + ".jpg",
                                       'rb') as fid:
            encoded_image_data = fid.read()
        key = hashlib.sha256(encoded_image_data).hexdigest()

        with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg",
                                       'rb') as fid:
            encoded_mask_data = fid.read()

        encoded_mask = io.BytesIO(encoded_mask_data)
        mask = Image.open(encoded_mask)
        mask_np = np.asarray(mask.convert('L'))
        mask_remapped = (mask_np == pixel_val).astype(np.uint8)
        # print("mask",mask_remapped.shape)
        # cv2.imshow("asd",mask_remapped*255)
        # cv2.waitKey()
        mask_img = Image.fromarray(mask_remapped)
        output = io.BytesIO()
        mask_img.save(output, format='PNG')

        xmins = [xmin / width]
        xmaxs = [xmax / width]
        ymins = [ymin / height]
        ymaxs = [ymax / height]
        classes_text = [object_name.encode('utf8')]
        classes = [1]
        masks = [output.getvalue()]

        print(img_name)
        print(xmins)
        print(xmaxs)
        print(ymins)
        print(ymaxs)
        print(classes_text)
        print(classes)
        print(masks)
        example = tensorflow.train.Example(features=tensorflow.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/key/sha256':
                dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
                'image/object/mask':
                dataset_util.bytes_list_feature(masks),
            }))
        if number < length * 0.8:
            globals()['train_writer_{:05d}-of-{:05d}'.format(
                int(number / (length * 0.8) * num_data),
                int(num_data))].write(example.SerializeToString())

        elif number < length * 0.9:
            globals()['valid_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.8) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
        elif number < length:

            globals()['test_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.9) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
예제 #22
0
def create_tf_example(group, path, class_dict):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    try:
        image = Image.open(encoded_jpg_io)
    except Exception as ex:
        print(ex)
        print('Invalid image, skipping: ', group.filename)
        return None

    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        if set(['xmin_rel', 'xmax_rel', 'ymin_rel',
                'ymax_rel']).issubset(set(row.index)):
            xmin = row['xmin_rel']
            xmax = row['xmax_rel']
            ymin = row['ymin_rel']
            ymax = row['ymax_rel']

        elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)):
            xmin = row['xmin'] / width
            xmax = row['xmax'] / width
            ymin = row['ymin'] / height
            ymax = row['ymax'] / height

        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_dict[row['class']])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
예제 #23
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):

    # image_height = image[2]
    # image_width = image[1]
    # filename = image[0]# TODO(user): Populate the following variables from your example.
    # print(image)
    height = image['height']  # Image height
    width = image['width']  # Image width
    filename = image[
        'filename']  # Filename of the image. Empty if image is not from file

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_image_io = io.BytesIO(encoded_jpg)  # Encoded image bytes
    image = PIL.Image.open(encoded_image_io)
    only_file_name, image_format = os.path.splitext(filename)

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per bo)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    # print(len(annotations_list))
    for annotation in annotations_list:
        # print(annotation)
        xmins.append(annotation['xmin'] / width)
        xmaxs.append(annotation['xmax'] / width)
        ymins.append(annotation['ymin'] / height)
        ymaxs.append(annotation['ymax'] / height)
        classes_text.append(annotation['label_text'].encode('utf8'))
        classes.append(annotation['label'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
예제 #24
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      include_mask=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids',
      u'neg_category_ids']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official LVIS dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing LVIS category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_mask: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    success: whether the conversion is successful
    filename: image filename
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['coco_url']
    filename = osp.join(*filename.split('/')[-2:])

    image_id = image['id']
    image_not_exhaustive_category_ids = image['not_exhaustive_category_ids']
    image_neg_category_ids = image['neg_category_ids']

    full_path = os.path.join(image_dir, filename)
    if not tf.gfile.Exists(full_path):
        tf.logging.warn(f'image {full_path} not exists! skip')
        return False, None, None

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/not_exhaustive_category_ids':
        dataset_util.int64_list_feature(image_not_exhaustive_category_ids),
        'image/image_neg_category_ids':
        dataset_util.int64_list_feature(image_neg_category_ids),
    }

    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])

            xmin_single = max(float(x) / image_width, 0.0)
            xmax_single = min(float(x + width) / image_width, 1.0)
            ymin_single = max(float(y) / image_height, 0.0)
            ymax_single = min(float(y + height) / image_height, 1.0)
            if xmax_single <= xmin_single or ymax_single <= ymin_single:
                continue
            xmin.append(xmin_single)
            xmax.append(xmax_single)
            ymin.append(ymin_single)
            ymax.append(ymax_single)

            is_crowd.append(0)
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_mask:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_mask:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return True, filename, example
예제 #25
0
def dict_to_tf_example(data,
                       image_path,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='images'):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      image_path: Full path to image file
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    # img_path = os.path.join(
    #     data['folder'], image_subdirectory, data['filename'])
    # full_path = os.path.join(dataset_directory, img_path)
    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    filename = full_path.split('/')[-1]

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = False  # bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            # truncated.append(int(obj['truncated']))
            truncated.append(0)
            # poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def create_tf_example(image_det, image_path, pdt):

    with tf.gfile.Open(image_path, 'rb') as image_file:
        encoded_image_data = image_file.read()

    with Image.open(image_path) as img:
        width, height = img.size

    image_format = b'jpeg'

    # filename = image_path.decode()
    filename = os.path.basename(image_path).encode(
        "utf-8")  # Filename of the image. Empty if image is not from file
    # image_format = image_path.split('.')[-1] # b'jpeg' or b'png'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
    ]  # List of normalized right x coordinates in bounding box  (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding boxz
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for row in image_det.iterrows():
        xmin = row[1]['XMin']
        xmax = row[1]['XMax']
        ymin = row[1]['YMin']
        ymax = row[1]['YMax']
        labelid = row[1]['LabelName']
        class_text = labelid.encode("utf-8")
        class_ = pdt[pdt['labelid'] == labelid].id.values[0]
        xmins.append(xmin)
        xmaxs.append(xmax)
        ymins.append(ymin)
        ymaxs.append(ymax)

        classes_text.append(class_text)
        classes.append(class_)

    print("\nimage : {}".format(image_path))
    print("classes : {}".format(classes_text))
    print('classes_num : {}\n'.format(classes))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
예제 #27
0
    def _create_tf_entry(self, categories, img, label, filename, annotations):
        imageFormat = b'jpg'

        width, height = img.size

        imgByteArr = io.BytesIO()
        img.save(imgByteArr, format='JPEG')
        encodedImageData = imgByteArr.getvalue()

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        for annotation in annotations:
            rect = None
            if type(
                    annotation.data
            ) is Rectangle:  #currently we only support Rect annotations, TODO: change me
                rect = annotation.data
            elif type(annotation.data) is Polygon:
                rect = annotation.data.rect

            if rect is not None:
                trimmed_rect = rect.trim(
                    Rectangle(0, 0, width, height)
                )  #scale to image dimension in case annotation exceeds image width/height

                if trimmed_rect.left < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect left dimension invalid! (<0)")
                if trimmed_rect.top < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect top dimension invalid! (<0)")
                if trimmed_rect.width < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect width dimension invalid! (<0)")
                if trimmed_rect.height < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect height dimension invalid! (<0)")

                if (trimmed_rect.left + trimmed_rect.width) > width:
                    raise ImageMonkeyGeneralError(
                        "bounding box width > image width!")
                if (trimmed_rect.top + trimmed_rect.height) > height:
                    raise ImageMonkeyGeneralError(
                        "bounding box height > image height!")

                xmin = trimmed_rect.left / float(width)
                xmax = (trimmed_rect.left + trimmed_rect.width) / float(width)
                ymin = trimmed_rect.top / float(height)
                ymax = (trimmed_rect.top + trimmed_rect.height) / float(height)

                #sanity checks
                if xmin > xmax:
                    raise ImageMonkeyGeneralError("xmin > xmax!")

                if ymin > ymax:
                    raise ImageMonkeyGeneralError("ymin > ymax!")

                if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0):
                    continue  #skip bounding boxes that are 0

                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)

        #we might have some images in our dataset, which don't have a annotation, skip those
        if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0)
                or (len(ymaxs) == 0)):
            return None

        classes = [(categories.index(label) + 1)] * len(
            xmins)  #class indexes start with 1
        labels = [label.encode('utf8')] * len(xmins)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename.encode()),
                'image/source_id':
                dataset_util.bytes_feature(filename.encode()),
                'image/encoded':
                dataset_util.bytes_feature(encodedImageData),
                'image/format':
                dataset_util.bytes_feature(imageFormat),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(labels),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        return tf_example
예제 #28
0
def create_tf_example():
    count = 0
    counter = 0
    writer = tf.python_io.TFRecordWriter(
        "/Data2TB/chl_data/rgb/train/augmented/train.record")  #output file

    #with open(filename) as f:
    #  content = f.readlines()
    #content = [x.strip() for x in content]
    #new_img = PIL.Image.new("L", (480, 640))
    #new_img.putdata(content)

    #with tf.gfile.GFile(filename, 'rb') as fid:
    #  encoded_jpg = fid.read()
    with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f:
        jsondata = json.load(f)
    for i in range(0, len(jsondata['frames'])):  #looping through JSON objects

        height = jsondata['frames'][i]["height"]  # Image height
        width = jsondata['frames'][i]["width"]  # Image width
        #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file
        #encoded_image_data = None # Encoded image bytes
        filename_only = jsondata['frames'][i]['file']
        print(str(i) + ": " + filename_only)
        filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_jpg = fid.read()
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        classes_text = [
        ]  # List of string class name of bounding box (1 per box)
        classes = []  # List of integer class id of bounding box (1 per box)

        for j in range(0, len(jsondata['frames'][i]['annotations'])):
            if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'):
                xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width
                xmax = (
                    jsondata['frames'][i]['annotations'][j]['x'] +
                    jsondata['frames'][i]['annotations'][j]['width']) / width
                ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height
                ymax = (
                    jsondata['frames'][i]['annotations'][j]['y'] +
                    jsondata['frames'][i]['annotations'][j]['height']) / height
                if xmin > 1:
                    xmin = 1.0
                if xmax > 1:
                    xmax = 1.0
                if ymin > 1:
                    ymin = 1.0
                if ymax > 1:
                    ymax = 1.0
                if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1):
                    print("UNNORMALIZED STUFF")
                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)
                classes_text.append('head')
                classes.append(1)
            #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'):
            #  xmin = (jsondata['frames'][i]['annotations'][j]['x'])
            #  ymin = (jsondata['frames'][i]['annotations'][j]['y'])
            #  if(xmin + 2 > width):
            #    xmin = width - 2
            #  if(ymin + 2 > height):
            #    ymin = height - 2
            #  xmax = xmin + 2
            #  ymax = ymin + 2
            #  xminf = xmin/width
            #  xmaxf = xmax/width
            #  yminf = ymin/height
            #  ymaxf = ymax/height


#
#  if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1):
#    print("UNNORMALIZED STUFF")
#  xmins.append(xminf)
#  xmaxs.append(xmaxf)
#  ymins.append(yminf)
#  ymaxs.append(ymaxf)
#  classes_text.append('shoulder')
#  classes.append(2)
        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/filename':
                dataset_util.bytes_feature(str.encode(filename)),
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        writer.write(tf_example.SerializeToString())
    writer.close()
예제 #29
0
def json_to_record(j):
    assert (len(j["image_size"]) == 1)
    assert (len(j["categories"]) == len(j["annotations"]))

    image_size = j["image_size"][0]
    height = image_size["height"]
    width = image_size["width"]

    filename = os.path.basename(j["file"])

    # actual image bytes? refer to dataset_tools/create_pet_tf_record.py
    with tf.gfile.GFile(j["file"], "rb") as fid:
        encoded_jpg = fid.read()
        pass
    encoded_image_data = encoded_jpg
    image_format = b'jpeg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    classes_text = []
    classes = []

    for annot in j["annotations"]:
        c_name = class_id_to_name(annot["class_id"])
        classes_text.append(c_name.encode("utf8"))
        # class_ids are indexed by 1 for tensorflow
        classes.append(annot["class_id"] + 1)
        corners = get_box_corners(annot)
        xmins.append(corners["xmin"] / width)
        xmaxs.append(corners["xmax"] / width)
        ymins.append(corners["ymin"] / height)
        ymaxs.append(corners["ymax"] / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    # print(tf_example)
    return tf_example
    pass
예제 #30
0
    def taco_to_tfrecord(self, dataset_dir, round, subset, tf_output, class_ids=None,
                class_map=None, return_taco=False, auto_download=False):
        """Load a subset of the TACO dataset.
        AND convert it to TF record

        dataset_dir: The root directory of the TACO dataset.
        round: split number
        subset: which subset to load (train, val, test)
        class_ids: If provided, only loads images that have the given classes.
        class_map: Dictionary used to assign original classes to new class system
        return_coco: If True, returns the COCO object.
        auto_download: Automatically download and unzip MS-COCO images and annotations
        """

        if not tf_output:
            raise ValueError("please provide a tf_output prefix parameter")

        # TODO: Once we got the server running
        # if auto_download is True:
        #     self.auto_download(dataset_dir, subset, year)
        ann_filepath = os.path.join(dataset_dir , 'annotations_')
        if round != None:
            ann_filepath += str(round) + "_" + subset + ".json"
        else:
            ann_filepath += subset + ".json"

        assert os.path.isfile(ann_filepath)

        # Load dataset
        dataset = json.load(open(ann_filepath, 'r'))

        # Replace dataset original classes before calling the coco Constructor
        # Some classes may be assigned background to remove them from the dataset
        self.replace_dataset_classes(dataset, class_map)

        taco_alla_coco = COCO()
        taco_alla_coco.dataset = dataset
        taco_alla_coco.createIndex()

        # Add images and classes except Background
        # Definitely not the most efficient way
        image_ids = []
        background_id = -1
        class_ids = sorted(taco_alla_coco.getCatIds())
        for i in class_ids:
            class_name = taco_alla_coco.loadCats(i)[0]["name"]
            if class_name != 'Background':
                self.add_class("taco", i, class_name)
                image_ids.extend(list(taco_alla_coco.getImgIds(catIds=i)))
            else:
                background_id = i
        image_ids = list(set(image_ids))

        if background_id > -1:
            class_ids.remove(background_id)

        print('Number of images used:', len(image_ids))

        ## Write all the classes, ordered by id ##
        # include Background to class labels
        # just nothing gets trained on class_id = 0
        all_class_ids = sorted(taco_alla_coco.getCatIds())
        with open(tf_output + "_classes.names", "w") as f:
            for class_id in all_class_ids:
                name = taco_alla_coco.cats[class_id]['name']   
                f.write(name + "\n")

        writer = tf.io.TFRecordWriter(tf_output + "_" + subset + ".bin")

        # Add images
        for i in image_ids:
            
            height = taco_alla_coco.imgs[i]["height"]
            width = width=taco_alla_coco.imgs[i]["width"]
            img = taco_alla_coco.imgs[i]

            annotations = taco_alla_coco.imgToAnns[img['id']]
            
            ## lists of features per image ##
            # bbox coordinates
            xminl = []
            yminl = []
            xmaxl = []
            ymaxl = []
            # category (numeric)
            catl = []
            # category name
            labell = []

            # default stuff from pascal voc (always set empty here.)
            viewl = [] # I think the original voc contains stuff like "frontal"
            truncatedl = []
            difficultl = []

            for ann in annotations:
                if ann['category_id'] == 0:
                    # if a label category has been replaced by category_id == 0
                    # when loading the category map we skip it
                    # because it's Background. and we don't need to a learn a bbox
                    # with background.
                    continue

                # category
                catl.append(ann['category_id'])

                # label of the category
                cat = taco_alla_coco.cats[ann['category_id']]
                labell.append(cat['name'].encode('utf8'))

                # bbox
                xmin, ymin, bbox_width, bbox_height = tuple(ann['bbox'])
                xminl.append(float(xmin) / width)
                yminl.append(float(ymin) / height)
                xmaxl.append(float(xmin + bbox_width) / width)
                ymaxl.append(float(ymin + bbox_height) / height)

                # defaults
                viewl.append("".encode('utf8'))
                truncatedl.append(0)
                difficultl.append(0)
            
            filename = os.path.join(dataset_dir, taco_alla_coco.imgs[i]['file_name'])
            filename = filename.encode('utf8')
            with tf.io.gfile.GFile(filename, 'rb') as fid:
                encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
            image = PIL.Image.open(encoded_jpg_io)
            if image.format != 'JPEG':
                raise ValueError('Image format not JPEG')
            key = hashlib.sha256(encoded_jpg).hexdigest()

            example = tf.train.Example(features=tf.train.Features(feature={
                'image/height': dataset_util.int64_feature(height),
                'image/width': dataset_util.int64_feature(width),
                'image/filename': dataset_util.bytes_feature(filename),
                'image/source_id': dataset_util.bytes_feature(filename),
                'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin': dataset_util.float_list_feature(xminl),
                'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxl),
                'image/object/bbox/ymin': dataset_util.float_list_feature(yminl),
                'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxl),
                'image/object/class/text': dataset_util.bytes_list_feature(labell),
                'image/object/class/label': dataset_util.int64_list_feature(catl),

                # we put these in just to be look like pascal voc example
                # but they're always set to defaults 
                # see : https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py#L124
                'image/object/difficult': dataset_util.int64_list_feature(difficultl),
                'image/object/truncated': dataset_util.int64_list_feature(truncatedl),
                'image/object/view': dataset_util.bytes_list_feature(viewl),
                }))
            writer.write(example.SerializeToString())

        writer.close()