def create_tf_example(f, image_path=None):
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    filename = f.readline().rstrip()
    filepath = os.path.join(image_path, filename)
    image_raw = cv2.imread(filepath)

    encoded_image_data = open(filepath, 'rb').read()
    key = hashlib.sha256(encoded_image_data).hexdigest()

    height, width, channel = image_raw.shape

    face_num = int(f.readline().rstrip())
    valid_face_num = 0

    for i in range(face_num):
        annot = f.readline().rstrip().split()
        # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY
        if (float(annot[2]) > 25.0):
            if (float(annot[3]) > 30.0):
                xmins.append(max(0.005, (float(annot[0]) / width)))
                ymins.append(max(0.005, (float(annot[1]) / height)))
                xmaxs.append(
                    min(0.995, ((float(annot[0]) + float(annot[2])) / width)))
                ymaxs.append(
                    min(0.995, ((float(annot[1]) + float(annot[3])) / height)))
                classes_text.append('face'.encode('utf8'))
                classes.append(1)
                print(xmins[-1], ymins[-1], xmaxs[-1], ymaxs[-1],
                      classes_text[-1], classes[-1])
                valid_face_num += 1

    print("Face Number is %d" % face_num)
    print("Valid face number is %d" % valid_face_num)

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }
    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))

    return valid_face_num, tf_example
Beispiel #2
0
def create_tf_example():

  height = 544 # Image height
  width = 960 # Image width
  filename = 'Image10.jpg' # Filename of the image. Empty if image is not from file
  filename=filename.encode()
  with tf.gfile.GFile("./Image10.jpg", 'rb') as fid:
        encoded_image = fid.read()
  image_format = b'jpg' # b'jpeg' or b'png'

  xmins = [458.0/960.0] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [807.0/960.0] # List of normalized right x coordinates in bounding box
             # (1 per box)
  ymins = [157.0/544.0] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = [360.0/544.0] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = b"library" # List of string class name of bounding box (1 per box)
  #classes_text=classes_text.encode("utf8")
  classes = "2" # List of integer class id of bounding box (1 per box)
  classes=classes.encode()
  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      #'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  return tf_example
def create_tf_record(image_file, height, width, xmins, ymins, xmaxs, ymaxs, classes_id, classes_text):
  '''
  param xmins: List of normalized left x coordinates in bounding box (1 per box)
  param xmaxs: List of normalized right x coordinates in bounding box (1 per box)
  param ymins: List of normalized top y coordinates in bounding box (1 per box)
  param ymaxs: List of normalized bottom y coordinates in bounding box (1 per box)
  param classes_text: List of string class name of bounding box (1 per box)
  param classes: List of integer class id of bounding box (1 per box)
  '''
  image_data = tf.gfile.FastGFile(image_file, 'rb').read()
  encoded_image_data = tf.compat.as_bytes(image_data) # Encoded image bytes
  filename = tf.compat.as_bytes(image_file)
  image_format = 'png'.encode('utf8') # b'jpeg' or b'png'

  tf_single_dataset = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(os.path.basename(filename)),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes_id),
  }))
  return tf_single_dataset
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    bboxes = img_data['bboxes']
    xmin, xmax, ymin, ymax = [], [], [], []
    for bbox in bboxes:
        xmin.append(bbox[0])
        xmax.append(bbox[0] + bbox[2])
        ymin.append(bbox[1])
        ymax.append(bbox[1] + bbox[3])

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(img_data['height']),
        'image/width': dataset_util.int64_feature(img_data['width']),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']),
        'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')),
    }))
    return example
def create_tf(input_path,filename,bb_list):
    height = size # Image height
    width = size # Image width
    encoded_image_data = tf.gfile.GFile(input_path+filename).read() # Encoded image bytes
    image_format = b'jpeg' # b'jpeg' or b'png'
    xmins = [i[0]/float(size) for i in bb_list] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [i[1]/float(size) for i in bb_list] # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [i[2]/float(size) for i in bb_list] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [i[3]/float(size) for i in bb_list] # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = ['ship'] * len(bb_list) # List of string class name of bounding box (1 per box)
    classes = [1] * len(bb_list) # List of integer class id of bounding box (1 per box)
    tf_image = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_image
def create_tf_example(filename, dirpath, boxes):

    #Read in the image
    with tf.gfile.GFile(os.path.join(dirpath, filename), 'rb') as fid:
        encoded_image_data = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_image_data)
    image = Image.open(encoded_jpg_io)

    width, height = image.size
    _, ext = os.path.splitext(filename)
    image_format = ext[1:].encode('UTF-8')  # b'jpeg' or b'png'

    xmins = [
        x / width for x in boxes['xmins']
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
        x / width for x in boxes['xmaxs']
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
        y / height for y in boxes['ymins']
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
        y / height for y in boxes['ymaxs']
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)

    #DEFAULT LABELS AND CLASSES FOR IVF
    classes_text = [b'sperm'] * len(
        xmins)  # List of string class name of bounding box (1 per box)
    classes = [1] * len(
        xmins)  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('UTF-8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('UTF-8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Beispiel #7
0
def create_tf_example(group, path, labels_mapping):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        class_idx = labels_mapping.get(row['class'], None)
        if class_idx is None:
            continue

        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)

        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_idx)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_example(group, img_path):

    with tf.gfile.GFile(os.path.join(img_path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    width, height = image.size
    filename = group.filename.encode('utf8')
    image_format = b'jpg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['label'].encode('utf8'))
        classes.append(class_text_to_int(row['label']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Beispiel #9
0
def create_tf_example(image_file):
    print("opening {}".format(image_file))
    with tf.gfile.GFile(image_file, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = os.path.basename(image_file).encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    xml_file = image_file + ".xml"
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
        classes_text.append(member[0].text.encode('utf-8'))
        classes.append(class_text_to_int(member[0].text))
        xmins.append(int(member[5][0].text) / width)
        xmaxs.append(int(member[5][1].text) / width)
        ymins.append(int(member[5][2].text) / height)
        ymaxs.append(int(member[5][3].text) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Beispiel #10
0
def create_tf_example(image_data):
    height = image_data.height  # Image height
    width = image_data.width  # Image width

    with tf.gfile.GFile(image_data.path, 'rb') as fid:
        encoded_png = fid.read()

    filename = image_data.id.encode('utf8')

    image_format = b'png'  # b'jpeg' or b'png'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for bbox in image_data.bboxes:
        xmins.append(bbox.min_x / width)
        xmaxs.append(bbox.max_x / width)
        ymins.append(bbox.min_y / height)
        ymaxs.append(bbox.max_y / height)
        classes_text.append(bbox.class_label.encode('utf8'))
        classes.append(bbox.class_id)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Beispiel #11
0
def create_tf_example(example, height, width):
    
    ## Udacity's real-life data set
    ## height = 1096 # Image height
    ## width = 1368 # Image width
    
    ## Udacity's simulator data set
    ## height = 600
    ## width = 800

    filename = example['filename'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin']+ box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example
Beispiel #12
0
def __create_tf_example(frame_data, sorted_label_list):
    im = PIL.Image.open(io.BytesIO(frame_data.image))
    arr = io.BytesIO()
    if frame_data.format == 'jpg':
        format = 'JPEG'
    else:
        format = frame_data.format.upper()
    im.save(arr, format=format)
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    rects, labels = bbox_writer.convert_text_to_rects_and_labels(
        frame_data.bboxes_text)
    # List of normalized coordinates, 1 per box, capped to [0, 1]
    xmins = [max(min(rect[0] / width, 1), 0) for rect in rects]  # left x
    xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects]  # right x
    ymins = [max(min(rect[1] / height, 1), 0) for rect in rects]  # top y
    ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects]  # bottom y

    classes_txt = [label.encode('utf-8') for label in labels]  # String names
    label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)}
    class_ids = [label_to_id_dict[label] for label in labels]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/source_id':
            dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(frame_data.format.encode('utf-8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_txt),
            'image/object/class/label':
            dataset_util.int64_list_feature(class_ids),
        }))
    label_counter_for_frame = collections.Counter(labels)
    is_negative = len(rects) == 0
    return tf_example, label_counter_for_frame, is_negative
def group_to_tf_record(point, image_directory):
    format_png = b'png'
    format_jpg = b'jpeg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    class_nums = []
    class_ids = []
    # changed point[0] to point as is just one point
    image_id = point['id']

    if image_id.startswith('frame'):
        filename = os.path.join(image_directory, image_id + '.png')
        format = format_png
    else:
        filename = os.path.join(image_directory, image_id + '.jpg') #.decode()
        format = format_jpg

    try:
        image = Image.open(filename)
        width, height = image.size
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_image = bytes(fid.read())
    except:
        return None
    key = hashlib.sha256(encoded_image).hexdigest()
    for anno in point['annotations']:
        xmins.append(float(anno['x0']))
        xmaxs.append(float(anno['x1']))
        ymins.append(float(anno['y0']))
        ymaxs.append(float(anno['y1']))
        class_nums.append(anno['class_num'])
        class_ids.append(bytes(anno['label'].encode('utf8')))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))),
        'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(class_ids),
        'image/object/class/label': dataset_util.int64_list_feature(class_nums)
    }))
    return tf_example
Beispiel #14
0
def create_tf_example(filename):
    coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2]
    leftUp, rightDown = [[int(eel) for eel in el.split('&')]
                         for el in coordinates.split('_')]
    xmin, ymin = leftUp
    xmax, ymax = rightDown

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    height = image.height
    width = image.width
    key = hashlib.sha256(encoded_jpg).hexdigest()

    ymins = [float(ymin) / height]
    xmins = [float(xmin) / width]
    ymaxs = [float(ymax) / height]
    xmaxs = [float(xmax) / width]

    labels_text = ['vehicle plate'.encode('utf8')]
    labels = [2]

    # print("---------image size:",image.size)
    # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax))
    # print("---------width:{}, height:{}".format(width,height))

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(labels_text),
        'image/object/class/label': dataset_util.int64_list_feature(labels),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def create_tfdatapoint(file_loc, file, labels):
    img = Image.open(os.path.join(file_loc, 'images', file))
    (width, height) = img.size
    encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file),
                                "rb").read()
    encoded = bytes(encoded)
    image_format = b'png'
    filename = file.split('.')[0]
    data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt'))
    data = data.reshape(int(data.size / 5), 5)

    classes = [int(x) for x in data[:, 0]]
    classes_text = [labels[x].encode('utf8') for x in classes]
    xmins = data[:, 1] - (data[:, 3] / 2.0)
    xmaxs = data[:, 1] + (data[:, 3] / 2.0)
    ymins = data[:, 2] - (data[:, 4] / 2.0)
    ymaxs = data[:, 2] + (data[:, 4] / 2.0)

    tf_label_and_data = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/source_id':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/encoded':
            dataset_util.bytes_feature(encoded),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_label_and_data
def create_tf_example(group, path):
    # Opening and readinf the files
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    # Encode the image in jpeg format to array values
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    # Setting up the image size
    width, height = image.size

    #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'])
        xmaxs.append(row['xmax'])
        ymins.append(row['ymin'])
        ymaxs.append(row['ymax'])
        classes.append(row['class'].encode('utf8'))

    # This is already exisiting code to convert csv to tfrecord
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/label': dataset_util.bytes_list_feature(
                classes),
        }))
    return tf_example
def create_tf_example(group, path):
    base = os.path.splitext(group.filename)[0]
    with tf.gfile.GFile(os.path.join(path, base + ".jpg"), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(int_to_class_text(row['classID']).encode('utf8'))
        classes.append(row['classID']+1)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
Beispiel #18
0
def create_tf_example(height, width, filename, encoded_image_data,
                      image_format, xmins, xmaxs, ymins, ymaxs, classes_text,
                      classes):
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),  # Image height
            'image/width':
            dataset_util.int64_feature(width),  # Image width
            'image/filename':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/source_id':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/encoded':
            dataset_util.bytes_feature(
                encoded_image_data),  # Encoded image bytes
            'image/format':
            dataset_util.bytes_feature(image_format),  # b'jpeg' or b'png'
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(
                xmins),  # normalized left x coordinate in bounding box
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(
                xmaxs),  # normalized right x coordinate in bounding box
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(
                ymins),  # normalized top y coordinate in bounding box
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(
                ymaxs),  # normalized bottom y coordinate in bounding box
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                classes_text),  # string class name of bounding box
            'image/object/class/label':
            dataset_util.int64_list_feature(
                classes),  # integer class id of bounding box
        }))
    return tf_example
Beispiel #19
0
def create_tf_example(csv, img_dir):
    img_fname = csv[0]
    x1, y1, x2, y2 = list(map(int, csv[1:-1]))
    cls_idx = int(csv[-1])
    cls_text = config.CLASS_NAMES[cls_idx].encode('utf8')
    with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    xmin = [x1 / width]
    xmax = [x2 / width]
    ymin = [y1 / height]
    ymax = [y2 / height]
    cls_text = [cls_text]
    cls_idx = [cls_idx]

    filename = img_fname.encode('utf8')
    image_format = b'jpg'

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(cls_text),
        'image/object/class/label': dataset_util.int64_list_feature(cls_idx),
    }))

    return tf_example
Beispiel #20
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      include_mask=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids',
      u'neg_category_ids']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official LVIS dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing LVIS category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_mask: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    success: whether the conversion is successful
    filename: image filename
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['coco_url']
    filename = osp.join(*filename.split('/')[-2:])

    image_id = image['id']
    image_not_exhaustive_category_ids = image['not_exhaustive_category_ids']
    image_neg_category_ids = image['neg_category_ids']

    full_path = os.path.join(image_dir, filename)
    if not tf.gfile.Exists(full_path):
        tf.logging.warn(f'image {full_path} not exists! skip')
        return False, None, None

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/not_exhaustive_category_ids':
        dataset_util.int64_list_feature(image_not_exhaustive_category_ids),
        'image/image_neg_category_ids':
        dataset_util.int64_list_feature(image_neg_category_ids),
    }

    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])

            xmin_single = max(float(x) / image_width, 0.0)
            xmax_single = min(float(x + width) / image_width, 1.0)
            ymin_single = max(float(y) / image_height, 0.0)
            ymax_single = min(float(y + height) / image_height, 1.0)
            if xmax_single <= xmin_single or ymax_single <= ymin_single:
                continue
            xmin.append(xmin_single)
            xmax.append(xmax_single)
            ymin.append(ymin_single)
            ymax.append(ymax_single)

            is_crowd.append(0)
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_mask:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_mask:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return True, filename, example
Beispiel #21
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys:
        [u'license', u'file_name', u'coco_url', u'height', u'width',
        u'date_captured', u'flickr_url', u'id']
      annotations_list:
        list of dicts with keys:
        [u'segmentation', u'area', u'iscrowd', u'image_id',
        u'bbox', u'category_id', u'id']
        Notice that bounding box coordinates in the official COCO dataset are
        given as [x, y, width, height] tuples using absolute coordinates where
        x, y represent the top-left (0-indexed) corner.  This function converts
        to the format expected by the Tensorflow Object Detection API (which is
        which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
        to image size).
      image_dir: directory containing the image files.
      category_index: a dict containing COCO category information keyed
        by the 'id' field of each category.  See the
        label_map_util.create_category_index function.
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.
    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
def create_tf_example():
    count = 0
    counter = 0
    writer = tf.python_io.TFRecordWriter(
        "/Data2TB/chl_data/rgb/train/augmented/train.record")  #output file

    #with open(filename) as f:
    #  content = f.readlines()
    #content = [x.strip() for x in content]
    #new_img = PIL.Image.new("L", (480, 640))
    #new_img.putdata(content)

    #with tf.gfile.GFile(filename, 'rb') as fid:
    #  encoded_jpg = fid.read()
    with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f:
        jsondata = json.load(f)
    for i in range(0, len(jsondata['frames'])):  #looping through JSON objects

        height = jsondata['frames'][i]["height"]  # Image height
        width = jsondata['frames'][i]["width"]  # Image width
        #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file
        #encoded_image_data = None # Encoded image bytes
        filename_only = jsondata['frames'][i]['file']
        print(str(i) + ": " + filename_only)
        filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_jpg = fid.read()
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        classes_text = [
        ]  # List of string class name of bounding box (1 per box)
        classes = []  # List of integer class id of bounding box (1 per box)

        for j in range(0, len(jsondata['frames'][i]['annotations'])):
            if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'):
                xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width
                xmax = (
                    jsondata['frames'][i]['annotations'][j]['x'] +
                    jsondata['frames'][i]['annotations'][j]['width']) / width
                ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height
                ymax = (
                    jsondata['frames'][i]['annotations'][j]['y'] +
                    jsondata['frames'][i]['annotations'][j]['height']) / height
                if xmin > 1:
                    xmin = 1.0
                if xmax > 1:
                    xmax = 1.0
                if ymin > 1:
                    ymin = 1.0
                if ymax > 1:
                    ymax = 1.0
                if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1):
                    print("UNNORMALIZED STUFF")
                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)
                classes_text.append('head')
                classes.append(1)
            #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'):
            #  xmin = (jsondata['frames'][i]['annotations'][j]['x'])
            #  ymin = (jsondata['frames'][i]['annotations'][j]['y'])
            #  if(xmin + 2 > width):
            #    xmin = width - 2
            #  if(ymin + 2 > height):
            #    ymin = height - 2
            #  xmax = xmin + 2
            #  ymax = ymin + 2
            #  xminf = xmin/width
            #  xmaxf = xmax/width
            #  yminf = ymin/height
            #  ymaxf = ymax/height


#
#  if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1):
#    print("UNNORMALIZED STUFF")
#  xmins.append(xminf)
#  xmaxs.append(xmaxf)
#  ymins.append(yminf)
#  ymaxs.append(ymaxf)
#  classes_text.append('shoulder')
#  classes.append(2)
        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/filename':
                dataset_util.bytes_feature(str.encode(filename)),
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        writer.write(tf_example.SerializeToString())
    writer.close()
Beispiel #23
0
    def _create_tf_entry(self, categories, img, label, filename, annotations):
        imageFormat = b'jpg'

        width, height = img.size

        imgByteArr = io.BytesIO()
        img.save(imgByteArr, format='JPEG')
        encodedImageData = imgByteArr.getvalue()

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        for annotation in annotations:
            rect = None
            if type(
                    annotation.data
            ) is Rectangle:  #currently we only support Rect annotations, TODO: change me
                rect = annotation.data
            elif type(annotation.data) is Polygon:
                rect = annotation.data.rect

            if rect is not None:
                trimmed_rect = rect.trim(
                    Rectangle(0, 0, width, height)
                )  #scale to image dimension in case annotation exceeds image width/height

                if trimmed_rect.left < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect left dimension invalid! (<0)")
                if trimmed_rect.top < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect top dimension invalid! (<0)")
                if trimmed_rect.width < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect width dimension invalid! (<0)")
                if trimmed_rect.height < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect height dimension invalid! (<0)")

                if (trimmed_rect.left + trimmed_rect.width) > width:
                    raise ImageMonkeyGeneralError(
                        "bounding box width > image width!")
                if (trimmed_rect.top + trimmed_rect.height) > height:
                    raise ImageMonkeyGeneralError(
                        "bounding box height > image height!")

                xmin = trimmed_rect.left / float(width)
                xmax = (trimmed_rect.left + trimmed_rect.width) / float(width)
                ymin = trimmed_rect.top / float(height)
                ymax = (trimmed_rect.top + trimmed_rect.height) / float(height)

                #sanity checks
                if xmin > xmax:
                    raise ImageMonkeyGeneralError("xmin > xmax!")

                if ymin > ymax:
                    raise ImageMonkeyGeneralError("ymin > ymax!")

                if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0):
                    continue  #skip bounding boxes that are 0

                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)

        #we might have some images in our dataset, which don't have a annotation, skip those
        if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0)
                or (len(ymaxs) == 0)):
            return None

        classes = [(categories.index(label) + 1)] * len(
            xmins)  #class indexes start with 1
        labels = [label.encode('utf8')] * len(xmins)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename.encode()),
                'image/source_id':
                dataset_util.bytes_feature(filename.encode()),
                'image/encoded':
                dataset_util.bytes_feature(encodedImageData),
                'image/format':
                dataset_util.bytes_feature(imageFormat),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(labels),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        return tf_example
Beispiel #24
0
def create_tf_example(filename, label_file):
    img = cv2.imread(filename)
    height, width, channels = img.shape

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_image_data = fid.read()

    image_format = b'jpg'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    with open(label_file, 'r') as f:
        csvreader = csv.reader(f, delimiter=' ')
        head = True

        for row in csvreader:
            if head:
                head = False
                continue

            name = row[-1]
            classes_text.append(name)
            classes.append(get_index(name))

            xmins.append(float(row[0]) / width)
            xmaxs.append(float(row[2]) / width)
            ymins.append(float(row[1]) / height)
            ymaxs.append(float(row[3]) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
Beispiel #25
0
def dict_to_tf_example(data,
                       image_path,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='images'):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      image_path: Full path to image file
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    # img_path = os.path.join(
    #     data['folder'], image_subdirectory, data['filename'])
    # full_path = os.path.join(dataset_directory, img_path)
    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    filename = full_path.split('/')[-1]

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = False  # bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            # truncated.append(int(obj['truncated']))
            truncated.append(0)
            # poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Beispiel #26
0
def background_tf_example(image_path, ):
    """
    Args:
      image_path: Full path to image file

    Returns:
      example: The converted tf.Example.
    """

    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    filename = full_path.split('/')[-1]
    width = image.width
    height = image.height

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Beispiel #27
0
def json_to_record(j):
    assert (len(j["image_size"]) == 1)
    assert (len(j["categories"]) == len(j["annotations"]))

    image_size = j["image_size"][0]
    height = image_size["height"]
    width = image_size["width"]

    filename = os.path.basename(j["file"])

    # actual image bytes? refer to dataset_tools/create_pet_tf_record.py
    with tf.gfile.GFile(j["file"], "rb") as fid:
        encoded_jpg = fid.read()
        pass
    encoded_image_data = encoded_jpg
    image_format = b'jpeg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    classes_text = []
    classes = []

    for annot in j["annotations"]:
        c_name = class_id_to_name(annot["class_id"])
        classes_text.append(c_name.encode("utf8"))
        # class_ids are indexed by 1 for tensorflow
        classes.append(annot["class_id"] + 1)
        corners = get_box_corners(annot)
        xmins.append(corners["xmin"] / width)
        xmaxs.append(corners["xmax"] / width)
        ymins.append(corners["ymin"] / height)
        ymaxs.append(corners["ymax"] / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    # print(tf_example)
    return tf_example
    pass
Beispiel #28
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):

    # image_height = image[2]
    # image_width = image[1]
    # filename = image[0]# TODO(user): Populate the following variables from your example.
    # print(image)
    height = image['height']  # Image height
    width = image['width']  # Image width
    filename = image[
        'filename']  # Filename of the image. Empty if image is not from file

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_image_io = io.BytesIO(encoded_jpg)  # Encoded image bytes
    image = PIL.Image.open(encoded_image_io)
    only_file_name, image_format = os.path.splitext(filename)

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per bo)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    # print(len(annotations_list))
    for annotation in annotations_list:
        # print(annotation)
        xmins.append(annotation['xmin'] / width)
        xmaxs.append(annotation['xmax'] / width)
        ymins.append(annotation['ymin'] / height)
        ymaxs.append(annotation['ymax'] / height)
        classes_text.append(annotation['label_text'].encode('utf8'))
        classes.append(annotation['label'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Beispiel #29
0
def dict_to_tf_example(writer, data):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    full_path = os.path.join(data['folder'], data['filename']).replace(
        '/home/data/usrs/jiangyz/images', '/data1/chenyf')
    #print('full_path%s'%full_path)

    OriImg = PIL.Image.open(full_path)
    if (OriImg.mode != 'RGB'):
        OriImg = OriImg.convert("RGB")
        print(full_path + ' is not a rgb image, converting...')
    OriImgArray = np.asarray(OriImg)

    w = int(OriImgArray.shape[1])
    h = int(OriImgArray.shape[0])

    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if difficult:
            print('there is a difficult instance.....')
            raw_input()
            continue

        left = int(obj['bndbox']['xmin'])
        top = int(obj['bndbox']['ymin'])
        right = int(obj['bndbox']['xmax'])
        down = int(obj['bndbox']['ymax'])
        # if (right-left)*(down-top)<w*h/4:
        #   continue

        difficult_obj = int(difficult)
        imgSinglePerson = OriImg
        imgSingle = np.asarray(imgSinglePerson)

        img_raw = imgSingle.tostring()
        classes_text = obj['name'].encode('utf8')
        classes = 0

        kp_cor_v = [int(x) for x in obj['keypoints']['visible']]

        truncated = int(obj['truncated'])
        poses = obj['pose'].encode('utf8')

        kpNum = FLAGS.kpNum

        if (sum(1 for x in kp_cor_v if x) < (kpNum + 1) / 2):
            continue

        kp_cor = []
        for tmp_id in range(kpNum):
            if kp_cor_v[tmp_id] != 0:
                #convert to imgSinglePerson
                xc = int(obj['keypoints']['x'][tmp_id])
                yc = int(obj['keypoints']['y'][tmp_id])
                kp_cor.append((xc, yc))
        global _all_num
        _all_num += 1
        #tf.summary.image('image',tf.convert_to_tensor(np.array([OriImgArray])),1)
        #show images

        #color = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255),(255,0,255),(0,0,0)]
        heatmap = np.zeros([h, w, kpNum], np.float32)
        #print('shape of heatmap=',np.shape(heatmap))
        sigma = 10
        for idx, cor_xy in enumerate(kp_cor):
            cor_x, cor_y = cor_xy
            r = 36  # int(8/96.0*224)
            for ii in range(-r, r + 1, 1):
                for jj in range(-r, r + 1, 1):
                    xxxx = cor_x + ii
                    yyyy = cor_y + jj
                    if (xxxx < 0) or (yyyy < 0) or (xxxx > w - 1) or (yyyy >
                                                                      h - 1):
                        continue
                    heatmap[yyyy, xxxx, idx] += np.exp(-(ii * ii + jj * jj) /
                                                       (2 * sigma * sigma))
                    #heatmap[yyyy,xxxx]=255
        heatmap[heatmap > 1] = 1.0

        #print('length of heatmap=',len(hm_raw))
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(h),
                'image/width':
                dataset_util.int64_feature(w),
                'image/filename':
                dataset_util.bytes_feature(data['filename'].encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(data['filename'].encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(img_raw),
                'image/heatmap':
                dataset_util.float_list_feature(heatmap.flatten()),
                'image/keypointnumber':
                dataset_util.int64_feature(kpNum),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/class/text':
                dataset_util.bytes_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_feature(classes),
                'image/object/difficult':
                dataset_util.int64_feature(difficult_obj),
                'image/object/truncated':
                dataset_util.int64_feature(truncated),
                'image/object/view':
                dataset_util.bytes_feature(poses),
            }))
        writer.write(example.SerializeToString())
Beispiel #30
0
def read_xml_make_tfrecord():
    num_data = 8
    for i in range(num_data):
        globals()['train_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data))] = tensorflow.io.TFRecordWriter(
                'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data)))

    for i in range(int(num_data / 8)):
        globals()['test_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))
        globals()['valid_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))

    length = len(os.listdir(folder))

    for number, img_name in enumerate(os.listdir(folder)):
        if img_name[-4:] != '.jpg': continue
        filename = img_name[:-4]
        img = cv2.imread(folder + filename + ".jpg")
        height, width = img.shape[:2]

        mask = cv2.imread('mask/' + filename + '.jpg', 0)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5),
                                                               np.uint8))
        cv2.imshow("asdas", mask)
        cv2.waitKey()
        _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE,
                                          cv2.CHAIN_APPROX_SIMPLE)
        print(contours)
        contours = sorted(contours, key=lambda x: len(x), reverse=True)

        x = [temp[0][0] for temp in contours[0]]
        y = [temp[0][1] for temp in contours[0]]
        xmin = min(x)
        xmax = max(x)
        ymin = min(y)
        ymax = max(y)
        # cv2.circle(img,(xmin,ymin),5,(255,0,0),5)
        # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5)
        # cv2.imshow("asd",img)
        # cv2.waitKey()
        object_name = 'passport'
        pixel_val = 255
        with tensorflow.io.gfile.GFile(folder + filename + ".jpg",
                                       'rb') as fid:
            encoded_image_data = fid.read()
        key = hashlib.sha256(encoded_image_data).hexdigest()

        with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg",
                                       'rb') as fid:
            encoded_mask_data = fid.read()

        encoded_mask = io.BytesIO(encoded_mask_data)
        mask = Image.open(encoded_mask)
        mask_np = np.asarray(mask.convert('L'))
        mask_remapped = (mask_np == pixel_val).astype(np.uint8)
        # print("mask",mask_remapped.shape)
        # cv2.imshow("asd",mask_remapped*255)
        # cv2.waitKey()
        mask_img = Image.fromarray(mask_remapped)
        output = io.BytesIO()
        mask_img.save(output, format='PNG')

        xmins = [xmin / width]
        xmaxs = [xmax / width]
        ymins = [ymin / height]
        ymaxs = [ymax / height]
        classes_text = [object_name.encode('utf8')]
        classes = [1]
        masks = [output.getvalue()]

        print(img_name)
        print(xmins)
        print(xmaxs)
        print(ymins)
        print(ymaxs)
        print(classes_text)
        print(classes)
        print(masks)
        example = tensorflow.train.Example(features=tensorflow.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/key/sha256':
                dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
                'image/object/mask':
                dataset_util.bytes_list_feature(masks),
            }))
        if number < length * 0.8:
            globals()['train_writer_{:05d}-of-{:05d}'.format(
                int(number / (length * 0.8) * num_data),
                int(num_data))].write(example.SerializeToString())

        elif number < length * 0.9:
            globals()['valid_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.8) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
        elif number < length:

            globals()['test_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.9) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())