コード例 #1
0
def create_tf_example(group, path):
    """Creates a tf.Example proto from sample buillding image tile.

    Args:
     encoded_building_image_data: The jpg encoded data of the building image.

    Returns:
     example: The created tf.Example.
    """
    with tf.gfile.GFile(op.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for _, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(tags[row['class_num']].encode('utf8'))
        classes.append(row['class_num'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #2
0
    def create_tf_example_other(self, example, filename):
        """
        OTHER
        """
        #print(filename)
        filename = filename.encode()
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_image = fid.read()
        image = Image.open(filename)
        (width, height) = image.size
        image_string = np.array(image).tostring()
        #image_format = 'png'.encode()
        image_format = 'jpg'.encode()
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []
        for box in example['annotations']:
            box_x = box['xmin']
            box_y = box['ymin']
            box_width = box['x_width']
            box_height = box['y_height']
            xmins.append(float(box_x / width))
            xmaxs.append(float((box_x + box_width) / width))
            ymins.append(float(box_y / height))
            ymaxs.append(float((box_y + box_height) / height))
            classes_text.append(box['class'].encode('utf-8'))
            print("[", box['class'].encode('utf-8'), "]")
            classes.append(int(DICT_LABEL_OTHER[box['class']]))

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename),
                'image/source_id':
                dataset_util.bytes_feature(filename),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image),
                'image/format':
                dataset_util.bytes_feature(image_format),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                #'image/object/class/text' : dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
                #'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_string])),
            }))
        return tf_example
コード例 #3
0
def create_tf_example(group, path, dictionary):
    with tf.io.gfile.GFile(
            os.path.join(path, '{}'.format(group.filename)) + '.jpg',
            'rb') as fid:
        # with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:## THis is 2.0 tf version of gfile
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    # print(filename,path)
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class'], dictionary))
        # didt = {'quantity': 2, 'product': 1}
        # classes.append(didt[row['class']])
        # print(classes)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #4
0
def _image_to_tfexample(image_name, annotation_name):
    """Generate a tf example by image and annotation file."""
    image_data = tf.gfile.FastGFile(image_name, 'rb').read()
    tree = ElementTree.parse(annotation_name)
    root = tree.getroot()

    # image shape
    size = root.find('size')
    height = int(size.find('height').text)
    width = int(size.find('width').text)
    channels = int(size.find('depth').text)

    # image annotations
    xmin = []
    xmax = []
    ymin = []
    ymax = []
    labels = []
    labels_text = []
    difficult = []
    truncated = []
    for obj in root.findall('object'):
        label_name = obj.find('name').text
        labels.append(int(VOC_LABELS[label_name][0]))
        labels_text.append(label_name.encode('ascii'))

        if obj.find('difficult'):
            difficult.append(int(obj.find('difficult').text))
        else:
            difficult.append(0)

        if obj.find('truncated'):
            truncated.append(int(obj.find('truncated').text))
        else:
            truncated.append(0)

        bbox = obj.find('bndbox')
        xmin.append(float(bbox.find('xmin').text) / width)
        xmax.append(float(bbox.find('xmax').text) / width)
        ymin.append(float(bbox.find('ymin').text) / height)
        ymax.append(float(bbox.find('ymax').text) / height)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/encoded': bytes_feature(image_data),
            'image/format': bytes_feature(b'JPEG'),
            'image/height': int64_feature(height),
            'image/width': int64_feature(width),
            'image/channels': int64_feature(channels),
            'image/object/bbox/xmin': float_list_feature(xmin),
            'image/object/bbox/xmax': float_list_feature(xmax),
            'image/object/bbox/ymin': float_list_feature(ymin),
            'image/object/bbox/ymax': float_list_feature(ymax),
            'image/object/bbox/label': int64_list_feature(labels),
            'image/object/bbox/text': bytes_list_feature(labels_text),
            'image/object/bbox/difficult': int64_list_feature(difficult),
            'image/object/bbox/truncated': int64_list_feature(truncated),
        }))
    return example
コード例 #5
0
def dict_to_tf_example(data,
                       dataset_directory,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        classes_text.append(obj['name'].encode('utf8'))
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example
コード例 #6
0
def create_tf_example(frame, label_map_dict):
    # TODO(user): Populate the following variables from your example.
    height = frame['height']  # Image height
    width = frame['width']  # Image width
    filename = '{}.jpg'.format(
        frame['frame_id']
    )  # Filename of the image. Empty if image is not from file
    img_path = os.path.join(FLAGS.image_dir, filename)
    filename = filename.encode()
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_image_data = fid.read()  # Encoded image bytes
    image_format = b'jpeg'  # b'jpeg' or b'png'

    xmins = [
        float(bbox[0]) / width for bbox in frame['bboxes']
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [float(bbox[2]) / width for bbox in frame['bboxes']
             ]  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
        float(bbox[1]) / height for bbox in frame['bboxes']
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [float(bbox[3]) / height for bbox in frame['bboxes']
             ]  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = [name.encode() for name in frame['names']
                    ]  # List of string class name of bounding box (1 per box)
    classes = [label_map_dict[name] for name in frame['names']
               ]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #7
0
ファイル: vatic.py プロジェクト: GBJim/tfo_training
def create_tf_example(bboxes, img_info, category_name2id,class_mapper={}):
  # TODO(user): Populate the following variables from your example.
  

  height = img_info['height']
  width = img_info['width']
  filename = img_info['path']
  with tf.gfile.GFile(filename, 'rb') as fid:
    encoded_jpg = fid.read()
   
  image_format = img_info['format'] 
  
  

  xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = [] # List of normalized right x coordinates in bounding box
             # (1 per box)
  ymins = [] # List of normalized coordinates in bounding box (1 per box)
  ymaxs = [] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = [] # List of string class name of bounding box (1 per box)
  classes = [] # List of integer class id of bounding box (1 per box)
    
  for bbox in bboxes:
            
    xmin = float(bbox['x1']) / width
    xmax = float(bbox['x1'] + bbox['width']) / width
    ymin = float(bbox['y1']) / height 
    ymax = float(bbox['y1'] + bbox['height']) / height        
    class_text = class_mapper[bbox['label']] if bbox['label'] in class_mapper else bbox['label']
    if class_text == "__background__":
        continue
    class_id = category_name2id[class_text]

    xmins.append(xmin)
    xmaxs.append(xmax)
    ymins.append(ymin)
    ymaxs.append(ymax)    
    classes_text.append(str(class_text))        
    classes.append(class_id)

  
  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(os.path.basename(filename)),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
  return tf_example
コード例 #8
0
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    # print('width, height',width, height)

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        print('row', row)
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))

        classes.append(class_text_to_int(row['class']))
    print('classes', classes)
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #9
0
def create_tf_example(ex_name, ex_xmins, ex_xmaxs, ex_ymins, ex_ymaxs):
    # TODO(user): Populate the following variables from your example.
    image = Image.open(os.path.join(PATH, ex_name))
    width, height = image.size
    '''
  height = ex_height # Image height
  width = ex_width # Image width
  '''
    filename = ex_name  # Filename of the image. Empty if image is not from file
    encoded_image_data = image.tobytes()  # Encoded image bytes
    image_format = b'jpg'  # b'jpeg' or b'png'

    xmins = [
        ex_xmins
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [ex_xmaxs
             ]  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
        ex_ymins
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [ex_ymaxs
             ]  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = ['Shoes'
                    ]  # List of string class name of bounding box (1 per box)
    classes = [91]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #10
0
def create_tf_example(group):
    encoded_img = object_storage.get_object(namespace, 'images',
                                            group.filename).data.content
    #with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
    #encoded_jpg = fid.read()
    #encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(io.BytesIO(encoded_img))
    if group.filename.endswith('.png'):
        image = image.convert('RGB')
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(row_labels[row['class']])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_img),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #11
0
def dict_to_tf_example(depth_map_path, normal_map_path, foreground_map_path,
                       point_path, sdf_path):
    """
    Convert gray-scale depth map to tf.Example proto.
    ----
    Args:
    ----
      xxx_path: Path to a corresponding data.
    ----
    Returns:
    ----
      example: The converted tf.Example.
    """
    # Reading depth map, scale, quaternion, points and sdf
    depth_map = load_image(depth_map_path,
                           (OUTPUT_HEIGHT, OUTPUT_WIDTH)).astype(np.uint8)
    normal_map = load_image(normal_map_path,
                            (OUTPUT_HEIGHT, OUTPUT_WIDTH)).astype(np.uint8)
    foreground_map = load_image(foreground_map_path,
                                (OUTPUT_HEIGHT, OUTPUT_WIDTH)).astype(np.uint8)

    # flatten here is necessary, otherwise the multi-dimensional ndarray cannot be stored in tf records
    points = np.squeeze(load_npy(point_path))[0:NUM_POINTS, :].flatten()
    sdf = load_npy(sdf_path)[0:NUM_POINTS, :].flatten()

    # Create the TFRecord example
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'depth_map/height':
            dataset_util.int64_feature(depth_map.shape[0]),
            'depth_map/width':
            dataset_util.int64_feature(depth_map.shape[1]),
            'depth_map/encoded':
            dataset_util.bytes_feature(tf.compat.as_bytes(
                depth_map.tostring())),
            'normal_map/height':
            dataset_util.int64_feature(normal_map.shape[0]),
            'normal_map/width':
            dataset_util.int64_feature(normal_map.shape[1]),
            'normal_map/encoded':
            dataset_util.bytes_feature(
                tf.compat.as_bytes(normal_map.tostring())),
            'foreground_map/height':
            dataset_util.int64_feature(foreground_map.shape[0]),
            'foreground_map/width':
            dataset_util.int64_feature(foreground_map.shape[1]),
            'foreground_map/encoded':
            dataset_util.bytes_feature(
                tf.compat.as_bytes(foreground_map.tostring())),
            'points':
            dataset_util.float_list_feature(points),
            'sdf':
            dataset_util.float_list_feature(sdf)
        }))
    return example
コード例 #12
0
def create_tf_example(image_path, example, image_id):
    """
    Create a tf_example using @example.
    @example is of form : ["ImageID", "XMin", "XMax", "YMin", "YMax"] which are the columns of "fish.csv".
    @example contains all bounding boxes for the image with @image_id
    """
    
    filename = image_id+'.jpg'
    
    image_path = os.path.join(image_path, filename)


    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_image_data = fid.read()
        
    filename = filename.encode()
    image = Image.open(image_path)
    width, height = image.size
    """
    if filename == "147441736948406.jpg":
      image.show()
    """
    del image
    
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    
    for bbox in np.array(example[['XMin', 'XMax', 'YMin', 'YMax']]):
        xmins += [bbox[0]]
        xmaxs += [bbox[1]]
        ymins += [bbox[2]]
        ymaxs += [bbox[3]]
    
    classes_text = [b'fish']*len(example)
    classes = [1]*len(example)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
コード例 #13
0
def create_tf_example(entry):
    height = entry['height']  # Image height
    width = entry['width']  # Image width
    filename = entry['file_name'].encode(
    )  # Filename of the image. Empty if image is not from file
    image_format = b'jpeg'  # b'jpeg' or b'png'
    encoded_image_data = open(BASE_DIR + filename.decode('ascii'),
                              'rb').read()  # Encoded image bytes
    xmins = [
        float(entry['bbox'][0] / width)
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
        float((entry['bbox'][0] + entry['bbox'][2]) / width)
    ]  # List of normalized right x coordinates in bounding box # (1 per box)
    ymins = [
        float(entry['bbox'][1] / height)
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
        float((entry['bbox'][1] + entry['bbox'][3]) / height)
    ]  # List of normalized bottom y coordinates in bounding box # (1 per box)
    classes_text = [entry['name'].encode()
                    ]  # List of string class name of bounding box (1 per box)
    classes = [entry['category_id']
               ]  # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #14
0
def create_tf_example(name, image_dir, annot_dir):
    image_path = os.path.join(image_dir, name+'.jpg')
    annot_path = os.path.join(annot_dir, name+'.mat')
    annot_mat = parse_coordinates(annot_path)
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    print (encoded_jpg)
    exit(1)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    filename = name.encode('utf8')
    image_format = b'jpg'
    # check if the image format is matching with your images.
    label = 'hand'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for coord in annot_mat:

        x_max, x_min, y_max, y_min = 0, float('inf'), 0, float('inf')
        for y, x in coord:
            x_max, x_min = max(x, x_max), min(x, x_min)
            y_max, y_min = max(y, y_max), min(y, y_min)
        # normalized cordinates
        # box cordinates in faster rcnn uses 0 and 1 to define the position of the bounding boxes.
        # so if my value is greater than 1, select 1
        xmins.append(max(float(x_min) / width, 0.0))
        ymins.append(max(float(y_min) / height, 0.0))
        xmaxs.append(min(float(x_max) / width, 1.0))
        ymaxs.append(min(float(y_max) / height, 1.0))
        classes_text.append(label.encode('utf8'))
        classes.append(class_text_to_int(label))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
コード例 #15
0
def create_tf_example(example):
    filename = example['filename'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_image)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    image_format = 'jpg'.encode()

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        # adding box, one image may have multiple detected boxes
        if box['xmin'] + box['x_width'] > width or box['ymin']+ box['y_height'] > height:
            continue

        xmins.append(float(box['xmin']) / width)
        xmaxs.append(float(box['xmin'] + box['x_width']) / width)
        ymins.append(float(box['ymin']) / height)
        ymaxs.append(float(box['ymin']+ box['y_height']) / height)
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example
コード例 #16
0
def create_tf_example(row):
    full_path = os.path.join(os.getcwd(), 'images',
                             '{}'.format(row['filename']))
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = row['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = [row['xmin'] / width]
    xmaxs = [row['xmax'] / width]
    ymins = [row['ymin'] / height]
    ymaxs = [row['ymax'] / height]
    classes_text = [row['class'].encode('utf8')]
    classes = [class_text_to_int(row['class'])]
    # print(classes)
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #17
0
def create_tf_feature(image_file_path: pathlib.PosixPath,
                      camera_token: str,
                      corner_list: np.ndarray,
                      image_width: int,
                      image_height: int, boxes: List[Box]) -> tf.train.Example:
    box_feature_list = [(box.name, box.token, object_idx_dict[box.name]) for box in boxes]
    box_feature_list = list(map(list, zip(*box_feature_list)))

    BOX_NAME_INDEX = 0
    BOX_TOKEN_INDEX = 1
    BOX_NAME_ID_INDEX = 2
    classes_text_list = [s.encode('utf-8') for s in box_feature_list[BOX_NAME_INDEX]]
    anns_token_list = [s.encode('utf-8') for s in box_feature_list[BOX_TOKEN_INDEX]]

    with tf.gfile.GFile(image_file_path.as_posix(), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    file_basename = image_file_path.as_posix()

    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(
            file_basename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            camera_token.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(corner_list[:, 0] / float(image_width)),
        'image/object/bbox/xmax': dataset_util.float_list_feature(corner_list[:, 1] / float(image_width)),
        'image/object/bbox/ymin': dataset_util.float_list_feature(corner_list[:, 2] / float(image_height)),
        'image/object/bbox/ymax': dataset_util.float_list_feature(corner_list[:, 3] / float(image_height)),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text_list),
        'image/object/class/label': dataset_util.int64_list_feature(box_feature_list[2]),
        'image/object/class/anns_id': dataset_util.bytes_list_feature(anns_token_list)
    }

    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))

    return example
コード例 #18
0
def create_tf_example(encoded_image_data, filename, x_min, x_max, y_min, y_max,
                      classes_text, classes):
    """Creates a tf.Example proto from sample cat image.

    Args:
    encoded_cat_image_data: The jpg encoded data of the cat image.

    Returns:
    example: The created tf.Example.
    """
    image_format = b'jpg'

    xmins = [x_min / width]
    xmaxs = [x_max / width]
    ymins = [y_min / height]
    ymaxs = [y_max / height]

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode()),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode()),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data.tobytes()),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
コード例 #19
0
def create_tf_example(df, img_id):
    image_path = 'data/images/'
    filename = '%06d.png'%img_id
    
    image_path = os.path.join(image_path, filename)
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_image_data = fid.read()
        
    filename = filename.encode()
    image = Image.open(image_path)
    width, height = image.size
    del image
    
    image_format = b'png'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    for bbox in np.array(df[['XMin', 'XMax', 'YMin', 'YMax']]):
        xmins += [bbox[0]/width]
        xmaxs += [bbox[1]/width]
        ymins += [bbox[2]/height]
        ymaxs += [bbox[3]/height]
    
    classes_text = [b'Car']*len(df)
    classes = [1]*len(df)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
コード例 #20
0
def create_tf_example(f,inputpath): # inputpath+filename->example
    # TODO(user): Populate the following variables from your example.
    height = 720 # Image height
    width = 1280 # Image width
    filename = f.split('.')[0].encode('utf8') # Filename of the image. Empty if image is not from file
    image_format = b'jpg' # b'jpeg' or b'png'

    # encoded_image_data = None # Encoded image bytes
    with tf.gfile.GFile(os.path.join(inputpath, f), 'rb') as fid:
        encoded_image_data = fid.read()
    
    image=Image.open(inputpath+f)
    width,height=image.size 




    xmins = [0] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [0] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [0] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [0] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = ['Human'.encode('utf8')] # List of string class name of bounding box (1 per box)
    classes = [1] # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
def create_mock_tfrecord():
    pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8),
                                'RGB')
    image_output_stream = StringIO.StringIO()
    pil_image.save(image_output_stream, format='png')
    encoded_image = image_output_stream.getvalue()

    feature_map = {
        'test_field':
        dataset_util.float_list_feature([1, 2, 3, 4]),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_map))
    with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
        writer.write(tf_example.SerializeToString())
コード例 #22
0
ファイル: create_tf_record.py プロジェクト: gourav108/coreml
def dict_to_tf_example(data,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    for obj in data['object']:
        difficult_obj.append(int(0))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)

        class_name = obj['name']
        classes_text.append(class_name.encode('utf8'))
        classes.append(label_map_dict[class_name])
        truncated.append(int(0))
        poses.append('Unspecified'.encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
コード例 #23
0
def create_tf_example(example, path_root, LABEL_DICT):
    # import image
    f_image = Image.open(path_root + example["image_name"])

    # get width and height of image
    width, height = f_image.size

    # crop image randomly around bouding box within a 0.15 * bbox extra range
    if FLAGS.evaluation_status != "test":

        left = example['x_1'] - round((random.random() * 0.15 + 0.05) *
                                      (example['x_2'] - example['x_1']))
        top = example['y_1'] - round((random.random() * 0.15 + 0.05) *
                                     (example['y_2'] - example['y_1']))
        right = example['x_2'] + round((random.random() * 0.15 + 0.05) *
                                       (example['x_2'] - example['x_1']))
        bottom = example['y_2'] + round((random.random() * 0.15 + 0.05) *
                                        (example['y_2'] - example['y_1']))

        if left < 0: left = 0
        if right >= width: right = width
        if top < 0: top = 0
        if bottom >= height: bottom = height

        f_image = f_image.crop((left, top, right, bottom))
        _width, _height = width, height
        width, height = f_image.size

    # read image as bytes string
    encoded_image_data = io.BytesIO()
    f_image.save(encoded_image_data, format='jpeg')
    encoded_image_data = encoded_image_data.getvalue()

    filename = example[
        "image_name"]  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    image_format = 'jpeg'.encode()  # b'jpeg' or b'png'

    if FLAGS.evaluation_status != "test":
        xmins = [
            (example['x_1'] - left) / width
        ]  # List of normalized left x coordinates in bounding box (1 per box)
        xmaxs = [
            (example['x_2'] - left) / width
        ]  # List of normalized right x coordinates in bounding box (1 per box)
        ymins = [
            (example['y_1'] - top) / height
        ]  # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs = [
            (example['y_2'] - top) / height
        ]  # List of normalized bottom y coordinates in bounding box (1 per box)
    else:
        xmins = [
            example['x_1'] / width
        ]  # List of normalized left x coordinates in bounding box (1 per box)
        xmaxs = [
            example['x_2'] / width
        ]  # List of normalized right x coordinates in bounding box (1 per box)
        ymins = [
            example['y_1'] / height
        ]  # List of normalized top y coordinates in bounding box (1 per box)
        ymaxs = [
            example['y_2'] / height
        ]  # List of normalized bottom y coordinates in bounding box (1 per box)

    assert (xmins[0] >= 0.) and (xmaxs[0] < 1.01) and (ymins[0] >= 0.) and (ymaxs[0] < 1.01), \
        (example, _width, _height, width, height, left, right, top, bottom, xmins, xmaxs, ymins, ymaxs)

    if width < 50 or height < 50 \
        or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) < 0.2 \
        or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) > 5.:
        return None

    if FLAGS.categories == 'broad':
        classes_text = [
            LABEL_DICT[example['category_type']].encode()
        ]  # List of string class name of bounding box (1 per box)
        classes = [example['category_type']
                   ]  # List of integer class id of bounding box (1 per box)
    elif FLAGS.categories == 'fine':
        classes_text = [
            example['category_name'].encode()
        ]  # List of string class name of bounding box (1 per box)
        classes = [example['category_label']
                   ]  # List of integer class id of bounding box (1 per box)
    else:
        raise (ValueError(
            "Incorrect value for flag categories. Must be 'broad' or 'fine'."))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
def create_tf_example(example):

    # Udacity sim data set
    height = 600  # Image height
    width = 800  # Image width

    filename = example[
        'filename']  # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode()

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin'] + box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
コード例 #25
0
def sub_img_to_tf_example(img_name, image, instanceImg):
    encoded_image = io.BytesIO()
    image.save(encoded_image, format='JPEG')
    key = hashlib.sha256(encoded_image.getvalue()).hexdigest()

    iimg_np = np.asarray(instanceImg).copy()
    iimg_vals = np.unique(iimg_np)
    assert (len(iimg_vals) > 0 and iimg_vals[0] == 0)
    instances = iimg_vals[1:]

    # tf.logging.debug("%s values: %s" % (img_name, iimg_vals))

    # if FLAGS.debug:
    # tf.logging.log_every_n(tf.logging.INFO, "%s (%ix%i): %02i instances" % (img_name, imgWidth, imgHeight, num_instances), 100)

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    masks = []
    # tf.logging.log(tf.logging.DEBUG, '%i' % num_instances)
    for (i, j) in enumerate(instances):
        try:
            # images are encoded (id * 1000) + instance
            inst_id = j % 1000
            inst_class = int((j - inst_id) / 1000)

            mask_bin = (iimg_np == j)
            mask = mask_bin.astype(np.uint8) * 2  # now mask is 0 or 2

            mask_first_pixel = tuple(
                np.column_stack(np.where(mask == 2))[0][::-1])

            # in allmost all cases this will just fill the single connected mask with ones
            cv2.floodFill(mask,
                          None,
                          mask_first_pixel,
                          1,
                          flags=8 | cv2.FLOODFILL_FIXED_RANGE)

            # BUT in a few cases this will detect an additional, unconnected portion of the mask..
            # most probably poison
            if not np.alltrue(mask <= 1):
                tf.logging.log(
                    tf.logging.WARN, '%02i/%02i (%s) has a split mask' %
                    (i, inst_class, img_name))

                if FLAGS.vmasks:
                    cv2.imshow('image', mask * 255)
                    keyb = cv2.waitKey(0)

                    if keyb == 27:
                        sys.exit()

                    cv2.destroyAllWindows()

                    continue

            output = io.BytesIO()
            # encode the mask as png
            mask_png = Image.fromarray(mask)
            mask_png.save(output, format='PNG')

            # calculate a box arround the mask
            indices_x = np.any(iimg_np == j, axis=0)
            indices_y = np.any(iimg_np == j, axis=1)
            x_mask = np.where(indices_x)
            y_mask = np.where(indices_y)

            xmin = np.min(x_mask)
            xmax = np.max(x_mask)
            ymin = np.min(y_mask)
            ymax = np.max(y_mask)

            x_fraction = (xmax - xmin) / image.width
            y_frcation = (ymax - ymin) / image.height
            area = x_fraction * y_frcation

            if area < FLAGS.min_area:
                if area > 0:
                    tf.logging.log(
                        tf.logging.WARN,
                        '%02i/%02i (%s) has area < treshold => %02.7f < %02.7f'
                        % (i, inst_class, img_name, area, FLAGS.min_area))
                else:
                    tf.logging.log(
                        tf.logging.ERROR,
                        '%02i/%02i (%s) has area < treshold => %02.7f < %02.7f'
                        % (i, inst_class, img_name, area, FLAGS.min_area))

                continue

            # if FLAGS.debug:
            # mask_png.save(os.path.join(OUTPUT_DIR, '%s%02i_instances.png' % (img_name, i)))

            masks.append(output.getvalue())
            xmins.append(xmin.astype(np.float) / image.width)
            xmaxs.append(xmax.astype(np.float) / image.width)
            ymins.append(ymin.astype(np.float) / image.height)
            ymaxs.append(ymax.astype(np.float) / image.height)

            classes.append(inst_class)
            # classes_text.append('traffic sign'.encode('utf8'))
            classes_text.append(class_mappings[inst_class].encode('utf8'))

            tf.logging.log(
                tf.logging.DEBUG,
                '%02i: (%04i,%04i), (%04i,%04i)' % (i, xmin, ymin, xmax, ymax))

        except ValueError:
            #      if FLAGS.debug:
            #        instanceImg.save(os.path.join(OUTPUT_DIR, '%s%02i_instances.png' % (img_name, i)))
            #        mask_png.save(os.path.join(OUTPUT_DIR, '%s%02i_single_mask.png' % (img_name, i)))
            tf.logging.warn(
                "%s (%ix%i): %02i instances/#%02i having invalid mask (not an instance):\nx-vals: %s \ny-vals: %s\n"
                % (img_name, image.width, image.heigth, len(instances) - 1, i,
                   x_mask, y_mask))
            continue

    # this image has no considerable boxes at all
    if len(classes) == 0:
        return None

    if FLAGS.debug:
        tf.logging.debug("%s: %02i instances used" % (img_name, len(masks)))
        # instanceImg.save(os.path.join(OUTPUT_DIR, '%s_%02i_instances.png' % (img_name, num_instances )))

    feature_dict = {
        'image/width': dataset_util.int64_feature(image.width),
        'image/height': dataset_util.int64_feature(image.height),
        'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_image.getvalue()),
        'image/format': dataset_util.bytes_feature('jpg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        #    'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        #    'image/object/truncated': dataset_util.int64_list_feature(truncated),
        #    'image/object/view': dataset_util.bytes_list_feature(poses),
        'image/object/mask': dataset_util.bytes_list_feature(masks)
    }

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
コード例 #26
0
def parse_example(f, images_path):
    height = None  # Image height
    width = None  # Image width
    filename = None  # Filename of the image. Empty if image is not from file
    encoded_image_data = None  # Encoded image bytes
    image_format = b'jpeg'  # b'jpeg' or b'png'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [
    ]  # List of normalized right x coordinates in bounding box (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [
    ]  # List of normalized bottom y coordinates in bounding box (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    poses = []
    truncated = []
    difficult_obj = []
    raw_all_annot = []

    filename = f.readline().rstrip()
    if not filename:
        raise FileNameIsNone()

    filepath = os.path.join(images_path, filename)
    if os.path.isfile(filepath) == False:
        raise IOError()

    face_num = int(f.readline().rstrip())
    if not face_num:
        raise FaceNumIsNone()

    for i in range(face_num):
        annot = f.readline().rstrip().split()
        if not annot:
            raise Exception()

        raw_all_annot.append(annot)

    image_raw = cv2.imread(filepath)
    if image_raw is None:
        raise IOError()
    original_height, original_width, original_channel = image_raw.shape
    # aspect_ratio = original_width / original_height
    # if aspect_ratio < .9 or aspect_ratio > 1.1:
    #   # image looses too much info if not square cropped
    #   bg_i = -1  # biggest
    #   bg_wh = 0
    #   for i in range(len(raw_all_annot)):
    #     annot = raw_all_annot[i]
    #     if float(annot[2]) > 25.0 and float(annot[3]) > 30.0:
    #       sum = float(annot[2]) + float(annot[3])
    #       if sum > bg_wh:
    #         bg_i = i
    #         bg_wh = sum
    #
    #   bg_annot = raw_all_annot[bg_i]
    #   bg_box_center = (float(bg_annot[0]) + float(bg_annot[2]) / 2, float(bg_annot[1]) + float(bg_annot[3]) / 2)
    #   min_d_start = min(bg_box_center[0], bg_box_center[1])
    #   min_d = min_d_start
    #   dx_end = original_width - bg_box_center[0]
    #   dy_end = original_height - bg_box_center[1]
    #   min_d_end = min(dx_end, dy_end)
    #
    #   if min_d_end < min_d_start:
    #     min_d = min_d_end
    #
    #   new_x_axis = bg_box_center[0] - min_d
    #   new_y_axis = bg_box_center[1] - min_d
    #   new_w = bg_box_center[0] + min_d
    #   new_h = bg_box_center[1] + min_d
    #   image_raw = image_raw[new_y_axis:new_h, new_x_axis:new_w]
    #   raw_all_annot = [
    #     annot for annot in raw_all_annot if
    #     float(bg_annot[0]) + float(bg_annot[2]) <= new_w and
    #     float(bg_annot[1]) + float(bg_annot[3]) <= new_h
    #   ]
    #   raw_all_annot = [
    #     [float(annot[0]) - new_x_axis, float(annot[1]) - new_y_axis, annot[2], annot[3]] for annot in raw_all_annot
    #   ]

    if config.RESIZE:
        image_raw = cv2.resize(image_raw, (config.RESIZE, config.RESIZE))

    is_success, buffer = cv2.imencode(".jpg", image_raw)
    encoded_image_data = buffer.tobytes()
    # encoded_image_data = io.BytesIO(buffer)
    # encoded_image_data = open(filepath, "rb").read()
    # key = hashlib.sha256(encoded_image_data).hexdigest()
    key = ''

    height, width, channel = image_raw.shape

    scaleW = width / original_width
    scaleH = height / original_height

    for i in range(len(raw_all_annot)):
        annot = raw_all_annot[i]

        # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY
        if float(annot[2]) > 25.0 and float(annot[3]) > 30.0:
            xmins.append(max(0.005, (float(annot[0]) * scaleW) / width))
            ymins.append(max(0.005, (float(annot[1]) * scaleH) / height))
            xmaxs.append(
                min(0.995,
                    ((float(annot[0]) + float(annot[2])) * scaleW) / width))
            ymaxs.append(
                min(0.995,
                    ((float(annot[1]) + float(annot[3])) * scaleH) / height))
            classes_text.append(b'face')
            classes.append(1)
            poses.append("front".encode('utf8'))
            truncated.append(int(0))

    if len(classes) == 0:
        return None

    to_str = lambda l: (str(x) for x in l)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(int(height)),
            'image/width':
            dataset_util.int64_feature(int(width)),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf-8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf-8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            # 'image/array': dataset_util.float_list_feature(
            #   (cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB) / 255.).flatten().tolist()),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/encoded':
            dataset_util.bytes_feature(','.join(
                to_str(xmins + xmaxs + ymins + ymaxs)).encode('utf-8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(int(0)),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))

    return tf_example
コード例 #27
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
    """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

    filtered_data_frame = annotations_data_frame[
        annotations_data_frame.LabelName.isin(label_map)]
    filtered_data_frame_boxes = filtered_data_frame[~filtered_data_frame.YMin.
                                                    isnull()]
    filtered_data_frame_labels = filtered_data_frame[
        filtered_data_frame.YMin.isnull()]
    image_id = annotations_data_frame.ImageID.iloc[0]

    feature_map = {
        standard_fields.TfExampleFields.object_bbox_ymin:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.YMin.as_matrix().astype(np.float)),
        standard_fields.TfExampleFields.object_bbox_xmin:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.XMin.as_matrix().astype(np.float)),
        standard_fields.TfExampleFields.object_bbox_ymax:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.YMax.as_matrix().astype(np.float)),
        standard_fields.TfExampleFields.object_bbox_xmax:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.XMax.as_matrix().astype(np.float)),
        standard_fields.TfExampleFields.object_class_text:
        dataset_util.bytes_list_feature(
            filtered_data_frame_boxes.LabelName.map(
                lambda x: x.encode()).as_matrix()),
        standard_fields.TfExampleFields.object_class_label:
        dataset_util.int64_list_feature(
            filtered_data_frame_boxes.LabelName.map(
                lambda x: label_map[x]).as_matrix().astype(np.int64)),
        standard_fields.TfExampleFields.filename:
        dataset_util.bytes_feature('{}.jpg'.format(image_id).encode()),
        standard_fields.TfExampleFields.source_id:
        dataset_util.bytes_feature(image_id.encode()),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    if 'IsGroupOf' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_group_of] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(
                            int))
    if 'IsOccluded' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_occluded] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsOccluded.as_matrix().astype(int))
    if 'IsTruncated' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_truncated] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsTruncated.as_matrix().astype(int))
    if 'IsDepiction' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_depiction] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsDepiction.as_matrix().astype(int))

    if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
        feature_map[standard_fields.TfExampleFields.
                    image_class_label] = dataset_util.int64_list_feature(
                        filtered_data_frame_labels.LabelName.map(
                            lambda x: label_map[x]).as_matrix())
        feature_map[standard_fields.TfExampleFields.
                    image_class_text] = dataset_util.bytes_list_feature(
                        filtered_data_frame_labels.LabelName.as_matrix()),
    return tf.train.Example(features=tf.train.Features(feature=feature_map))
コード例 #28
0
def create_tf_example(name, img_dir, ann_dir):

    IMG_FILENAME = '%s.jpg' % name
    ANN_FILENAME = '%s.mat' % name
    IMG_FULL_PATH = os.path.join(img_dir, IMG_FILENAME)
    ANN_FULL_PATH = os.path.join(ann_dir, ANN_FILENAME)

    with tf.gfile.GFile(IMG_FULL_PATH, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    label = 'hand'
    width, height = image.size

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    coords = coords_from_mat(ANN_FULL_PATH)

    for coord in coords:

        x_max, x_min, y_max, y_min = 0, float('inf'), 0, float('inf')
        for y,x in coord:
            x_max, x_min = max(x, x_max), min(x, x_min)
            y_max, y_min = max(y, y_max), min(y, y_min)

        xmin.append(max(float(x_min) / width, 0.0))
        ymin.append(max(float(y_min) / height, 0.0))
        xmax.append(min(float(x_max) / width, 1.0))
        ymax.append(min(float(y_max) / height, 1.0))
        classes_text.append(label.encode('utf8'))
        classes.append(label_map_dict[label])
        truncated.append(0)
        poses.append('Frontal'.encode('utf8'))
        difficult_obj.append(0)

    return tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
              IMG_FILENAME.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
              IMG_FILENAME.encode('utf8').encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
      }))
def dict_to_tf_example(data_dict, dataset_directory):
    #img_path = os.path.join(data_dict['folder'], image_subdirectory, data_dict['filename'])
    #global roi
    full_path = data_dict['filename']
    if not Path(full_path).exists():
        full_path = os.path.join(dataset_directory, full_path)  # for label image tools

    with tf.io.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
        # if roi:
        #     encoded_jpg_io = io.BytesIO(encoded_jpg)
        #     image = PIL.Image.open(encoded_jpg_io)
        #     image = image.crop(roi)
        #     image.save('.temp.jpg')
        #     with tf.io.gfile.GFile('.temp.jpg', 'rb') as tmp_fid:
        #         encoded_jpg = tmp_fid.read()

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        if image.format == "BMP":
            newJPEGPath = bmpToJpg(dataset_directory, data_dict['filename'])
            with tf.io.gfile.GFile(newJPEGPath, 'rb') as fid:
                encoded_jpg = fid.read()
            encoded_jpg_io = io.BytesIO(encoded_jpg)
            image = PIL.Image.open(encoded_jpg_io)
            os.remove(newJPEGPath) # delete generate tmp file
        else:
            raise ValueError('Image format not JPEG or BMP')

    width = image.width #data_dict.get('width', image.width)
    height = image.height #data_dict.get('height', image.height)
    filename = data_dict['filename']
    source_id = data_dict.get('source_id', filename)
    sha256 = hashlib.sha256(encoded_jpg).hexdigest()
    format = 'jpeg'
    
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    # 至少需要一个
    for obj in data_dict.get('objects', []):
        xmin.append(float(obj['xmin']) / width)
        ymin.append(float(obj['ymin']) / height)
        xmax.append(float(obj['xmax']) / width)
        ymax.append(float(obj['ymax']) / height)
        classes_text.append(obj['text'].encode('utf8'))
        classes.append(obj['label'])
        difficult_obj.append(obj.get('difficult', 0))
        truncated.append(obj.get('truncated', 0))
        poses.append(obj.get('pose', 'Unspecified').encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(source_id.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(sha256.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(format.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))

    return example
コード例 #30
0
def dict_to_tf_example(data,
                       label_map_dict,
                       example,
                       ignore_difficult_instances=False):
    try:
        dirname = os.path.dirname(example)
        basename = os.path.basename(example)
        filename = os.path.splitext(basename)[0]
        img_path = '{}.jpg'.format(os.path.join(dirname, filename))
        object_name = img_path.split('/')[-2]
    except:
        print('error')
        return 0
    try:
        if os.path.isfile(img_path):
            with tf.gfile.GFile(img_path, 'rb') as fid:
                encoded_jpg = fid.read()
            encoded_jpg_io = io.BytesIO(encoded_jpg)
            image = PIL.Image.open(encoded_jpg_io)
            signal = 0
        else:
            signal = 1
    except:
        print('error')
        return 0
    if signal == 0:
        if image.format != 'JPEG':
            print('image format is not jpeg')
            return 0
        else:
            try:
                width = int(data['size']['width'])
                height = int(data['size']['height'])
                xmin = []
                ymin = []
                xmax = []
                ymax = []
                classes = []
                classes_text = []
                for obj in data['object']:
                    xmin.append(float(obj['bndbox']['xmin']) / width)
                    ymin.append(float(obj['bndbox']['ymin']) / height)
                    xmax.append(float(obj['bndbox']['xmax']) / width)
                    ymax.append(float(obj['bndbox']['ymax']) / height)
                    class_name = object_name
                    classes_text.append(class_name)
                    classes.append(label_map_dict[class_name])
            except:
                return 0
        try:
            example = tf.train.Example(features=tf.train.Features(
                feature={
                    'image/height':
                    dataset_util.int64_feature(height),
                    'image/width':
                    dataset_util.int64_feature(width),
                    'image/filename':
                    dataset_util.bytes_feature(data['filename'].encode(
                        'utf8')),
                    'image/source_id':
                    dataset_util.bytes_feature(data['filename'].encode(
                        'utf8')),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpg),
                    'image/format':
                    dataset_util.bytes_feature(b'jpg'),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature(xmin),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature(xmax),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature(ymin),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature(ymax),
                    'image/object/class/text':
                    dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label':
                    dataset_util.int64_list_feature(classes),
                }))
            return example
        except:
            return 0