def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature('0'.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def _make_fake_tfrecord(self): tfrecord_path = os.path.join(tempfile.mkdtemp(), 'test.tfrecords') writer = tf.io.TFRecordWriter(tfrecord_path) encoded_jpg = tf.io.encode_jpeg(tf.ones([512, 512, 3], dtype=tf.uint8)) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(512), 'image/width': tfrecord_util.int64_feature(512), 'image/filename': tfrecord_util.bytes_feature('test_file_name.jpg'.encode( 'utf8')), 'image/source_id': tfrecord_util.bytes_feature('123456'.encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature('qwdqwfw12345'.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg.numpy()), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature([0.1]), 'image/object/bbox/xmax': tfrecord_util.float_list_feature([0.1]), 'image/object/bbox/ymin': tfrecord_util.float_list_feature([0.2]), 'image/object/bbox/ymax': tfrecord_util.float_list_feature([0.2]), 'image/object/class/text': tfrecord_util.bytes_list_feature(['test'.encode('utf8')]), 'image/object/class/label': tfrecord_util.int64_list_feature([1]), 'image/object/difficult': tfrecord_util.int64_list_feature([]), 'image/object/truncated': tfrecord_util.int64_list_feature([]), 'image/object/view': tfrecord_util.bytes_list_feature([]), })) writer.write(example.SerializeToString()) return tfrecord_path
def create_tf_example(img_path, image_dir, bbox_annotations=None, category_index=None, category_df=None, caption_annotations=None, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. caption_annotations: list of dict with keys: [u'id', u'image_id', u'str']. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img = cv2.imread(img_path) height, width, channel = img.shape image_height = height image_width = width filename = p.basename(img_path) image_id = _get_img_id(img_path) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': tfrecord_util.int64_feature(image_height), 'image/width': tfrecord_util.int64_feature(image_width), 'image/filename': tfrecord_util.bytes_feature(filename.encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), } num_annotations_skipped = 0 if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: xmin.append(object_annotations["XMin"]) xmax.append(object_annotations["XMax"]) ymin.append(object_annotations["YMin"]) ymax.append(object_annotations["YMax"]) is_crowd.append(False) category_id = int(object_annotations['LabelName']) category_ids.append(category_id) category_names.append(invClassDict[category_id].encode('utf8')) area.append( height * width * (object_annotations["XMax"] - object_annotations["XMin"]) * (object_annotations["YMax"] - object_annotations["YMin"])) if include_masks: ##### todo todo todo run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(category_names), 'image/object/class/label': tfrecord_util.int64_list_feature(category_ids), 'image/object/is_crowd': tfrecord_util.int64_list_feature(is_crowd), 'image/object/area': tfrecord_util.float_list_feature(area), }) if include_masks: ### todotodotodo feature_dict['image/object/mask'] = ( tfrecord_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example, num_annotations_skipped
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages', ann_json_dict=None): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running tfrecord_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. ann_json_dict: annotation json dictionary. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory,data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() # width = int(data['size']['width']) # height = int(data['size']['height']) width, height = image.size image_id = get_image_id(data['filename']) if ann_json_dict: image = { 'file_name': data['filename'], 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # if 'object' in data: # for obj in data['object']: # difficult = bool(int(obj['difficult'])) difficult = bool(0) # if ignore_difficult_instances and difficult: # continue difficult_obj.append(int(difficult)) xmin.append(float(data['x1']) / width) ymin.append(float(data['y1']) / height) xmax.append(float(data['x2']) / width) ymax.append(float(data['y2']) / height) classes_text.append(data['class'].encode('utf8')) classes.append(label_map_dict[data['class']]) truncated.append(int(0)) poses.append(POSE.encode('utf8')) if ann_json_dict: abs_xmin = int(data['xmin']) abs_ymin = int(data['ymin']) abs_xmax = int(data['xmax']) abs_ymax = int(data['ymax']) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict[data['class']], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature( str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), 'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj), 'image/object/truncated': tfrecord_util.int64_list_feature(truncated), 'image/object/view': tfrecord_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example( data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory="JPEGImages", ann_json_dict=None, ): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running tfrecord_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. ann_json_dict: annotation json dictionary. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data["filename"]) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, "rb") as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) """ if image.format != 'JPEG': raise ValueError('Image format not JPEG') """ key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data["size"]["width"]) height = int(data["size"]["height"]) image_id = get_image_id(data["filename"]) if ann_json_dict: image = { "file_name": data["filename"], "height": height, "width": width, "id": image_id, } ann_json_dict["images"].append(image) xmin = [] ymin = [] xmax = [] ymax = [] area = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if "object" in data: for obj in data["object"]: difficult = bool(int(obj["difficult"])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj["bndbox"]["xmin"]) / width) ymin.append(float(obj["bndbox"]["ymin"]) / height) xmax.append(float(obj["bndbox"]["xmax"]) / width) ymax.append(float(obj["bndbox"]["ymax"]) / height) area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1])) classes_text.append(obj["name"].encode("utf8")) classes.append(label_map_dict[obj["name"]]) truncated.append(int(obj["truncated"])) poses.append(obj["pose"].encode("utf8")) if ann_json_dict: abs_xmin = int(obj["bndbox"]["xmin"]) abs_ymin = int(obj["bndbox"]["ymin"]) abs_xmax = int(obj["bndbox"]["xmax"]) abs_ymax = int(obj["bndbox"]["ymax"]) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { "area": abs_width * abs_height, "iscrowd": 0, "image_id": image_id, "bbox": [abs_xmin, abs_ymin, abs_width, abs_height], "category_id": label_map_dict[obj["name"]], "id": get_ann_id(), "ignore": 0, "segmentation": [], } ann_json_dict["annotations"].append(ann) example = tf.train.Example( features=tf.train.Features( feature={ "image/height": tfrecord_util.int64_feature(height), "image/width": tfrecord_util.int64_feature(width), "image/filename": tfrecord_util.bytes_feature( data["filename"].encode("utf8") ), "image/source_id": tfrecord_util.bytes_feature( str(image_id).encode("utf8") ), "image/key/sha256": tfrecord_util.bytes_feature(key.encode("utf8")), "image/encoded": tfrecord_util.bytes_feature(encoded_jpg), "image/format": tfrecord_util.bytes_feature("jpeg".encode("utf8")), "image/object/bbox/xmin": tfrecord_util.float_list_feature(xmin), "image/object/bbox/xmax": tfrecord_util.float_list_feature(xmax), "image/object/bbox/ymin": tfrecord_util.float_list_feature(ymin), "image/object/bbox/ymax": tfrecord_util.float_list_feature(ymax), "image/object/area": tfrecord_util.float_list_feature(area), "image/object/class/text": tfrecord_util.bytes_list_feature( classes_text ), "image/object/class/label": tfrecord_util.int64_list_feature(classes), "image/object/difficult": tfrecord_util.int64_list_feature( difficult_obj ), "image/object/truncated": tfrecord_util.int64_list_feature(truncated), "image/object/view": tfrecord_util.bytes_list_feature(poses), } ) ) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ann_json_dict=None): """ Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: from wider_annotations[SET] dataset_directory: Path to root directory holding WIDER dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the dataset directory holding the actual image data. ann_json_dict: annotation json dictionary. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['path']) full_path = os.path.join(dataset_directory, img_path) with tf.io.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(image.width) height = int(image.height) image_id = get_image_id() if ann_json_dict: image = { 'file_name': data['path'], 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] area = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for i, bx in enumerate(data['bbox']): difficult = False difficult_obj.append(int(difficult)) xmin.append(float(bx[0]) / width) ymin.append(float(bx[1]) / height) xmax.append(float(bx[2]) / width) ymax.append(float(bx[3]) / height) area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1])) classes_text.append('face'.encode('utf8')) classes.append(label_map_dict['face']) truncated.append(False) poses.append(str(data['poses'][i]).encode('utf8')) if ann_json_dict: abs_xmin = int(bx[0]) abs_ymin = int(bx[1]) abs_xmax = int(bx[2]) abs_ymax = int(bx[3]) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict['face'], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(str(data['path']).encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/area': tfrecord_util.float_list_feature(area), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), 'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj), 'image/object/truncated': tfrecord_util.int64_list_feature(truncated), 'image/object/view': tfrecord_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, image_path, label_map_dict): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running tfrecord_util.recursive_parse_xml_to_dict) images_dir: Path to the directory holding raw images. label_map_dict: A map from string label names to integers ids. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join(image_path) with tf.io.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) image_id = get_image_id(data['filename']) xmin = [] ymin = [] xmax = [] ymax = [] area = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1])) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/area': tfrecord_util.float_list_feature(area), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), 'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj), 'image/object/truncated': tfrecord_util.int64_list_feature(truncated), 'image/object/view': tfrecord_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(img_path: str, base_dir: str, bboxes: list, label_map_dict, ann_json_dict=None): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: img_path: The path (or relative to base dir) of the image. base_dir: The directory where all data sets are stored. bboxes: A list of all bounding boxes in that image. label_map_dict: A map from string label names to integers ids. Returns: example: The converted tf.Example. """ full_path = os.path.join(base_dir, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_img = fid.read() encoded_jpg_io = io.BytesIO(encoded_img) image = PIL.Image.open(encoded_jpg_io) width, height = image.size key = hashlib.sha256(encoded_img).hexdigest() image_id = get_image_id(img_path) if ann_json_dict: image = { 'file_name': img_path, 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] for bb in bboxes: box = bb.split(',') xmin.append(float(box[0]) / width) ymin.append(float(box[1]) / height) xmax.append(float(box[2]) / width) ymax.append(float(box[3]) / height) classes_text.append(box[4].encode('utf8')) classes.append(label_map_dict[box[4]]) if ann_json_dict: abs_xmin = int(box[0]) abs_ymin = int(box[1]) abs_xmax = int(box[2]) abs_ymax = int(box[3]) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict[box[4]], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_img), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes) })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.compat.v1.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': tfrecord_util.int64_feature(image_height), 'image/width': tfrecord_util.int64_feature(image_width), 'image/filename': tfrecord_util.bytes_feature(filename.encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(category_names), 'image/object/is_crowd': tfrecord_util.int64_list_feature(is_crowd), 'image/object/area': tfrecord_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( tfrecord_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, dataset_directory, filepath, camera_settings, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages', visibility_thresh=0.1, ann_json_dict=None): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running tfrecord_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. ann_json_dict: annotation json dictionary. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = filepath.split('.')[0] + '.jpg' with tf.io.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(camera_settings['captured_image_size']['width']) height = int(camera_settings['captured_image_size']['height']) image_id = img_path.split('.')[0].split('/')[-1] image_id = get_image_id(img_path) if ann_json_dict: image = { 'file_name': img_path, 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] area = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'objects' in data: for obj in data['objects']: difficult_obj.append(0) xmin.append(float(obj['bounding_box']['top_left'][1]) / width) ymin.append(float(obj['bounding_box']['top_left'][0]) / height) xmax.append(float(obj['bounding_box']['bottom_right'][1]) / width) ymax.append(float(obj['bounding_box']['bottom_right'][0]) / height) area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1])) classes_text.append(obj['class'].encode('utf8')) classes.append(label_map_dict[obj['class']]) visibility = obj['visibility'] truncated.append(int(visibility > visibility_thresh)) poses.append('Frontal'.encode('utf8')) if ann_json_dict: abs_xmin = int(obj['bounding_box']['top_left'][1]) abs_ymin = int(obj['bounding_box']['top_left'][0]) abs_xmax = int(obj['bounding_box']['bottom_right'][1]) abs_ymax = int(obj['bounding_box']['bottom_right'][0]) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict[obj['class']], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) example = tf.train.Example( features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/area': tfrecord_util.float_list_feature(area), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), 'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj), 'image/object/truncated': tfrecord_util.int64_list_feature(truncated), 'image/object/view': tfrecord_util.bytes_list_feature(poses), })) return example