def create_tf_example(image, image_dir, bbox_annotations=None, category_index=None, caption_annotations=None, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. caption_annotations: list of dict with keys: [u'id', u'image_id', u'str']. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': tfrecord_util.int64_feature(image_height), 'image/width': tfrecord_util.int64_feature(image_width), 'image/filename': tfrecord_util.bytes_feature(filename.encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), } num_annotations_skipped = 0 if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(category_names), 'image/object/class/label': tfrecord_util.int64_list_feature(category_ids), 'image/object/is_crowd': tfrecord_util.int64_list_feature(is_crowd), 'image/object/area': tfrecord_util.float_list_feature(area), }) if include_masks: feature_dict['image/object/mask'] = ( tfrecord_util.bytes_list_feature(encoded_mask_png)) if caption_annotations: captions = [] for caption_annotation in caption_annotations: captions.append(caption_annotation['caption'].encode('utf8')) feature_dict.update( {'image/caption': tfrecord_util.bytes_list_feature(captions)}) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages', ann_json_dict=None): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running tfrecord_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. ann_json_dict: annotation json dictionary. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) image_id = get_image_id(data['filename']) if ann_json_dict: image = { 'file_name': data['filename'], 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] area = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) area.append((xmax[-1] - xmin[-1]) * (ymax[-1] - ymin[-1])) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if ann_json_dict: abs_xmin = int(obj['bndbox']['xmin']) abs_ymin = int(obj['bndbox']['ymin']) abs_xmax = int(obj['bndbox']['xmax']) abs_ymax = int(obj['bndbox']['ymax']) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict[obj['name']], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) example = tf.train.Example( features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/area': tfrecord_util.float_list_feature(area), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), 'image/object/difficult': tfrecord_util.int64_list_feature(difficult_obj), 'image/object/truncated': tfrecord_util.int64_list_feature(truncated), 'image/object/view': tfrecord_util.bytes_list_feature(poses), })) return example
def json_to_tf_example(example, label_map_dict, ann_json_dict=None): img_path = os.path.splitext(example)[0] + '.jpg' with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') # SHA校验码 key = hashlib.sha256(encoded_jpg).hexdigest() width = image.width height = image.height with open(example, 'r') as f: data = json.load(f) image_id = get_image_id(data['imagePath']) if ann_json_dict: image = { 'file_name': data['imagePath'], 'height': height, 'width': width, 'id': image_id, } ann_json_dict['images'].append(image) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] # truncated = [] # poses = [] # difficult_obj = [] if 'shapes' in data: for obj in data['shapes']: # 忽略.names文件中不存在的类别 label if obj['label'] not in label_map_dict: continue # 转化bbox坐标 x0, y0, x1, y1 = _get_boundingbox(obj['points']) xmin.append(x0 / width) ymin.append(y0 / height) xmax.append(x1 / width) ymax.append(y1 / height) # 类别标签设置utf8编码,以支持中文 classes_text.append(obj['label'].encode('utf-8')) classes.append(label_map_dict[obj['label']]) if ann_json_dict: abs_xmin = int(x0) abs_ymin = int(y0) abs_xmax = int(x1) abs_ymax = int(y1) abs_width = abs_xmax - abs_xmin abs_height = abs_ymax - abs_ymin ann = { 'area': abs_width * abs_height, 'iscrowd': 0, 'image_id': image_id, 'bbox': [abs_xmin, abs_ymin, abs_width, abs_height], 'category_id': label_map_dict[obj['label']], 'id': get_ann_id(), 'ignore': 0, 'segmentation': [], } ann_json_dict['annotations'].append(ann) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_util.int64_feature(height), 'image/width': tfrecord_util.int64_feature(width), 'image/filename': tfrecord_util.bytes_feature(data['imagePath'].encode('utf8')), 'image/source_id': tfrecord_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': tfrecord_util.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_util.bytes_feature(encoded_jpg), 'image/format': tfrecord_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_util.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_util.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_util.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_util.float_list_feature(ymax), 'image/object/class/text': tfrecord_util.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_util.int64_list_feature(classes), })) return tf_example
def transfer_one_block(self, tfrecord_fn, block_sampled_datas, raw_vertex_num): from tfrecord_util import bytes_feature, int64_feature if not hasattr(self, 'eles_sorted'): self.sort_eles(block_sampled_datas.keys()) #************************************************************************* dls = {} for item in self.eles_sorted: dls[item] = [] for e in self.eles_sorted[item]: data = block_sampled_datas[e] dls[item].append( data ) if len(dls[item]) > 0: dls[item] = np.concatenate(dls[item], -1) # dtye auto transform here if needed #************************************************************************* # fix face_i shape if 'face_i' in dls: face_shape = dls['face_i'].shape tile_num = self.num_face - face_shape[0] assert tile_num>=0, "face num > buffer: {}>{}".format(face_shape[0], self.num_face) tmp = np.tile( dls['face_i'][0:1,:], [tile_num, 1]) #tmp = np.ones([self.num_face - face_shape[0], face_shape[1]], np.int32) * (-777) dls['face_i'] = np.concatenate([dls['face_i'], tmp], 0) #************************************************************************* # convert to expample dls = Raw_To_Tfrecord.check_types(dls) if not hasattr(self, 'ele_idxs'): self.record_shape_idx(block_sampled_datas, dls) print(self.ele_idxs) max_category = np.max(ele_in_feature(dls, 'label_category', self.ele_idxs)) assert max_category < self.dataset_meta.num_classes, "max_category {} > {}".format(\ max_category, self.dataset_meta.num_classes) vertex_f_bin = dls['vertex_f'].tobytes() #vertex_i_shape_bin = np.array(dls['vertex_i'].shape, np.int32).tobytes() if 'vertex_i' in dls: vertex_i_bin = dls['vertex_i'].tobytes() vertex_uint8_bin = dls['vertex_uint8'].tobytes() if 'face_i' in dls: face_i_bin = dls['face_i'].tobytes() features_map = { 'vertex_f': bytes_feature(vertex_f_bin), 'vertex_uint8': bytes_feature(vertex_uint8_bin), } if 'vertex_i' in dls: features_map['vertex_i'] = bytes_feature(vertex_i_bin) if 'face_i' in dls: features_map['face_i'] = bytes_feature(face_i_bin) features_map['valid_num_face'] = int64_feature(face_shape[0]) example = tf.train.Example(features=tf.train.Features(feature=features_map)) #************************************************************************* with tf.python_io.TFRecordWriter( tfrecord_fn ) as raw_tfrecord_writer: raw_tfrecord_writer.write(example.SerializeToString()) if self.fi %5 ==0: print('{}/{} write tfrecord OK: {}'.format(self.fi, self.fn, tfrecord_fn))
def create_tf_example( image, image_dir, bbox_annotations=None, category_index=None, caption_annotations=None, ): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. caption_annotations: list of dict with keys: [u'id', u'image_id', u'str']. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image["height"] image_width = image["width"] filename = image["file_name"] image_id = image["id"] full_path = os.path.join(image_dir, filename) with tf.io.gfile.GFile(full_path, "rb") as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { "image/height": tfrecord_util.int64_feature(image_height), "image/width": tfrecord_util.int64_feature(image_width), "image/filename": tfrecord_util.bytes_feature(filename.encode("utf8")), "image/source_id": tfrecord_util.bytes_feature(str(image_id).encode("utf8")), "image/key/sha256": tfrecord_util.bytes_feature(key.encode("utf8")), "image/encoded": tfrecord_util.bytes_feature(encoded_jpg), "image/format": tfrecord_util.bytes_feature("jpeg".encode("utf8")), } num_annotations_skipped = 0 xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] if bbox_annotations: for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations["bbox"]) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations["iscrowd"]) category_id = int(object_annotations["category_id"]) category_ids.append(category_id) category_names.append( category_index[category_id]["name"].encode("utf8")) area.append(object_annotations["area"]) feature_dict.update({ "image/object/bbox/xmin": tfrecord_util.float_list_feature(xmin), "image/object/bbox/xmax": tfrecord_util.float_list_feature(xmax), "image/object/bbox/ymin": tfrecord_util.float_list_feature(ymin), "image/object/bbox/ymax": tfrecord_util.float_list_feature(ymax), "image/object/class/text": tfrecord_util.bytes_list_feature(category_names), "image/object/class/label": tfrecord_util.int64_list_feature(category_ids), "image/object/is_crowd": tfrecord_util.int64_list_feature(is_crowd), "image/object/area": tfrecord_util.float_list_feature(area), }) if caption_annotations: captions = [] for caption_annotation in caption_annotations: captions.append(caption_annotation["caption"].encode("utf8")) feature_dict.update( {"image/caption": tfrecord_util.bytes_list_feature(captions)}) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped