def create_tf_example(image, labels, points, img_file, id): image_height = image['height'] image_width = image['width'] filename = image['file_name'] if USE_INDEX_IN_FILE: file_index = int(filename[filename.find("_") + 1:]) else: file_index = id with tf.gfile.GFile(img_file, 'rb') as fid: encoded_jpg = fid.read() xs = [] ys = [] category_ids = [] encoded_mask_png = [] num_annotations_skipped = 0 print("ann size:", len(labels)) for label,point in zip(labels,points): x,y = point category_id = int(label) if not category_id_filter(category_id): num_annotations_skipped += 1 continue xs.append(float(x) / image_width) ys.append(float(y) / image_height) category_ids.append(category_id) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/file_index': dataset_util.int64_feature(file_index), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/point/x': dataset_util.float_list_feature(xs), 'image/object/point/y': dataset_util.float_list_feature(ys), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if example is None: print("None example") return None, None return example, num_annotations_skipped
def create_tf_example(image, annotations): global src_file_index image_height = image['img_height'] image_width = image['img_width'] img_path = image['img_path'] if RECORD_IMG_SIZE is None: with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() else: img = wmli.imread(img_path) img = wmli.resize_img(img, RECORD_IMG_SIZE, keep_aspect_ratio=True) encoded_jpg = wmli.encode_img(img) xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_ids = [] for l, box in annotations: xmin.append(box[1]) xmax.append(box[3]) ymin.append(box[0]) ymax.append(box[2]) is_crowd.append(False) category_ids.append(l) if len(xmin) == 0: return None feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ global src_file_index image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() #encoded_jpg_io = io.BytesIO(encoded_jpg) #image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 repeat_nr = 1 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) category_id = trans_id(category_id) if not category_id_filter(category_id): num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_ids.append(category_id) category_names.append(category_index[reverse_trans_id(category_id)] ['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), 'image/file_index': dataset_util.int64_feature(src_file_index), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) #category_id is the key of ID_TO_TEXT if len(category_ids) == 0: return None, None, None, None src_file_index += 1 return key, example, num_annotations_skipped, repeat_nr
def create_tf_example(image, annotations_list, img_file, id): image_height = image['height'] image_width = image['width'] filename = image['file_name'] if USE_INDEX_IN_FILE: file_index = int(filename[filename.find("_") + 1:]) else: file_index = id with tf.gfile.GFile(img_file, 'rb') as fid: encoded_jpg = fid.read() xmin = [] xmax = [] ymin = [] ymax = [] category_ids = [] encoded_mask_png = [] num_annotations_skipped = 0 print("ann size:", len(annotations_list)) for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) if not category_id_filter(category_id): num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) category_ids.append(category_id) binary_mask = object_annotations["segmentation"] #cv2.imwrite(wmlu.home_dir("x.jpg"),binary_mask*255) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) #for test #if len(xmin) == 0: #return None,None feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/file_index': dataset_util.int64_feature(file_index), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), } feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) if example is None: print("None example") return None, None return example, num_annotations_skipped