예제 #1
0
def create_tf_example(image,
                      labels,
                      points,
                      img_file,
                      id):
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    if USE_INDEX_IN_FILE:
        file_index = int(filename[filename.find("_") + 1:])
    else:
        file_index = id

    with tf.gfile.GFile(img_file, 'rb') as fid:
        encoded_jpg = fid.read()

    xs = []
    ys = []
    category_ids = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    print("ann size:", len(labels))
    for label,point in zip(labels,points):
        x,y = point
        category_id = int(label)
        if not category_id_filter(category_id):
            num_annotations_skipped += 1
            continue

        xs.append(float(x) / image_width)
        ys.append(float(y) / image_height)
        category_ids.append(category_id)

    feature_dict = {
        'image/height':
            dataset_util.int64_feature(image_height),
        'image/width':
            dataset_util.int64_feature(image_width),
        'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
        'image/file_index': dataset_util.int64_feature(file_index),
        'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
        'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/point/x':
            dataset_util.float_list_feature(xs),
        'image/object/point/y':
            dataset_util.float_list_feature(ys),
        'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))

    if example is None:
        print("None example")
        return None, None
    return example, num_annotations_skipped
예제 #2
0
def create_tf_example(image, annotations):
    global src_file_index
    image_height = image['img_height']
    image_width = image['img_width']
    img_path = image['img_path']

    if RECORD_IMG_SIZE is None:
        with tf.gfile.GFile(img_path, 'rb') as fid:
            encoded_jpg = fid.read()
    else:
        img = wmli.imread(img_path)
        img = wmli.resize_img(img, RECORD_IMG_SIZE, keep_aspect_ratio=True)
        encoded_jpg = wmli.encode_img(img)

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_ids = []

    for l, box in annotations:
        xmin.append(box[1])
        xmax.append(box[3])
        ymin.append(box[0])
        ymax.append(box[2])
        is_crowd.append(False)
        category_ids.append(l)

    if len(xmin) == 0:
        return None

    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
예제 #3
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    global src_file_index
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    #encoded_jpg_io = io.BytesIO(encoded_jpg)
    #image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    repeat_nr = 1
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        category_id = trans_id(category_id)
        if not category_id_filter(category_id):
            num_annotations_skipped += 1
            continue

        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_ids.append(category_id)
        category_names.append(category_index[reverse_trans_id(category_id)]
                              ['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
        'image/file_index': dataset_util.int64_feature(src_file_index),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    #category_id is the key of ID_TO_TEXT
    if len(category_ids) == 0:
        return None, None, None, None
    src_file_index += 1
    return key, example, num_annotations_skipped, repeat_nr
예제 #4
0
def create_tf_example(image, annotations_list, img_file, id):
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    if USE_INDEX_IN_FILE:
        file_index = int(filename[filename.find("_") + 1:])
    else:
        file_index = id

    with tf.gfile.GFile(img_file, 'rb') as fid:
        encoded_jpg = fid.read()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    category_ids = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    print("ann size:", len(annotations_list))
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        category_id = int(object_annotations['category_id'])
        if not category_id_filter(category_id):
            num_annotations_skipped += 1
            continue

        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        category_ids.append(category_id)

        binary_mask = object_annotations["segmentation"]
        #cv2.imwrite(wmlu.home_dir("x.jpg"),binary_mask*255)
        pil_image = PIL.Image.fromarray(binary_mask)
        output_io = io.BytesIO()
        pil_image.save(output_io, format='PNG')
        encoded_mask_png.append(output_io.getvalue())

    #for test
    #if len(xmin) == 0:
    #return None,None

    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/file_index': dataset_util.int64_feature(file_index),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label':
        dataset_util.int64_list_feature(category_ids),
    }
    feature_dict['image/object/mask'] = (
        dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))

    if example is None:
        print("None example")
        return None, None
    return example, num_annotations_skipped