def __create_tf_example(frame_data, sorted_label_list): im = PIL.Image.open(io.BytesIO(frame_data.image)) arr = io.BytesIO() if frame_data.format == 'jpg': format = 'JPEG' else: format = frame_data.format.upper() im.save(arr, format=format) height = im.height width = im.width encoded_image_data = arr.getvalue() rects, labels = bbox_writer.convert_text_to_rects_and_labels( frame_data.bboxes_text) # List of normalized coordinates, 1 per box, capped to [0, 1] xmins = [max(min(rect[0] / width, 1), 0) for rect in rects] # left x xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects] # right x ymins = [max(min(rect[1] / height, 1), 0) for rect in rects] # top y ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects] # bottom y classes_txt = [label.encode('utf-8') for label in labels] # String names label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)} class_ids = [label_to_id_dict[label] for label in labels] tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(frame_data.format.encode('utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_txt), 'image/object/class/label': dataset_util.int64_list_feature(class_ids), })) label_counter_for_frame = collections.Counter(labels) is_negative = len(rects) == 0 return tf_example, label_counter_for_frame, is_negative
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example): # Udacity real data set height = 600 # Image height width = 800 # Image width filename = example['filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin']+ box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def group_to_tf_record(point, image_directory): format_png = b'png' format_jpg = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] class_nums = [] class_ids = [] # changed point[0] to point as is just one point image_id = point['id'] if image_id.startswith('frame'): filename = os.path.join(image_directory, image_id + '.png') format = format_png else: filename = os.path.join(image_directory, image_id + '.jpg') #.decode() format = format_jpg try: image = Image.open(filename) width, height = image.size with tf.gfile.GFile(filename, 'rb') as fid: encoded_image = bytes(fid.read()) except: return None key = hashlib.sha256(encoded_image).hexdigest() for anno in point['annotations']: xmins.append(float(anno['x0'])) xmaxs.append(float(anno['x1'])) ymins.append(float(anno['y0'])) ymaxs.append(float(anno['y1'])) class_nums.append(anno['class_num']) class_ids.append(bytes(anno['label'].encode('utf8'))) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))), 'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(class_ids), 'image/object/class/label': dataset_util.int64_list_feature(class_nums) })) return tf_example
def create_tfdatapoint(file_loc, file, labels): img = Image.open(os.path.join(file_loc, 'images', file)) (width, height) = img.size encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file), "rb").read() encoded = bytes(encoded) image_format = b'png' filename = file.split('.')[0] data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt')) data = data.reshape(int(data.size / 5), 5) classes = [int(x) for x in data[:, 0]] classes_text = [labels[x].encode('utf8') for x in classes] xmins = data[:, 1] - (data[:, 3] / 2.0) xmaxs = data[:, 1] + (data[:, 3] / 2.0) ymins = data[:, 2] - (data[:, 4] / 2.0) ymaxs = data[:, 2] + (data[:, 4] / 2.0) tf_label_and_data = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/source_id': dataset_util.bytes_feature(str.encode(filename)), 'image/encoded': dataset_util.bytes_feature(encoded), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_label_and_data
def create_tf_example(filename): coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2] leftUp, rightDown = [[int(eel) for eel in el.split('&')] for el in coordinates.split('_')] xmin, ymin = leftUp xmax, ymax = rightDown with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) height = image.height width = image.width key = hashlib.sha256(encoded_jpg).hexdigest() ymins = [float(ymin) / height] xmins = [float(xmin) / width] ymaxs = [float(ymax) / height] xmaxs = [float(xmax) / width] labels_text = ['vehicle plate'.encode('utf8')] labels = [2] # print("---------image size:",image.size) # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax)) # print("---------width:{}, height:{}".format(width,height)) feature_dict = { 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels_text), 'image/object/class/label': dataset_util.int64_list_feature(labels), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def dict_to_coco_example(img_data): """Convert python dictionary formath data of one image to tf.Example proto. Args: img_data: infomation of one image, inclue bounding box, labels of bounding box,\ height, width, encoded pixel data. Returns: example: The converted tf.Example """ bboxes = img_data['bboxes'] xmin, xmax, ymin, ymax = [], [], [], [] for bbox in bboxes: xmin.append(bbox[2]) xmax.append(bbox[0]) ymin.append(bbox[3]) ymax.append(bbox[1]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(img_data['height']), 'image/width': dataset_util.int64_feature(img_data['width']), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']), 'image/object/class/text': dataset_util.bytes_list_feature(img_data['text']), 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')), 'image/object/class/file': dataset_util.bytes_feature(img_data['file'].encode('utf-8')), })) return example
def create_tf_example(height, width, filename, encoded_image_data, image_format, xmins, xmaxs, ymins, ymaxs, classes_text, classes): tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), # Image height 'image/width': dataset_util.int64_feature(width), # Image width 'image/filename': dataset_util.bytes_feature(filename), # Filename of the image 'image/source_id': dataset_util.bytes_feature(filename), # Filename of the image 'image/encoded': dataset_util.bytes_feature( encoded_image_data), # Encoded image bytes 'image/format': dataset_util.bytes_feature(image_format), # b'jpeg' or b'png' 'image/object/bbox/xmin': dataset_util.float_list_feature( xmins), # normalized left x coordinate in bounding box 'image/object/bbox/xmax': dataset_util.float_list_feature( xmaxs), # normalized right x coordinate in bounding box 'image/object/bbox/ymin': dataset_util.float_list_feature( ymins), # normalized top y coordinate in bounding box 'image/object/bbox/ymax': dataset_util.float_list_feature( ymaxs), # normalized bottom y coordinate in bounding box 'image/object/class/text': dataset_util.bytes_list_feature( classes_text), # string class name of bounding box 'image/object/class/label': dataset_util.int64_list_feature( classes), # integer class id of bounding box })) return tf_example
def create_tf_example(csv, img_dir): img_fname = csv[0] x1, y1, x2, y2 = list(map(int, csv[1:-1])) cls_idx = int(csv[-1]) cls_text = config.CLASS_NAMES[cls_idx].encode('utf8') with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size xmin = [x1 / width] xmax = [x2 / width] ymin = [y1 / height] ymax = [y2 / height] cls_text = [cls_text] cls_idx = [cls_idx] filename = img_fname.encode('utf8') image_format = b'jpg' tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(cls_text), 'image/object/class/label': dataset_util.int64_list_feature(cls_idx), })) return tf_example
def create_tf_example(group, path): # Class numeric labels as dict class_dict = class_img_dict(path) #Opening and readinf the files with tf.gfile.GFile( os.path.join(path, '{}/{}'.format(group.label, group.filename)), 'rb') as fid: encoded_jpg = fid.read() # Encode the image in jpeg format to array values encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) # Setting up the image size width, height = image.size #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_dict[row['class']]) # This is already exisiting code to convert csv to tfrecord tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example): (obj_type, fileidx, annotations, data, sh) = example # TODO(user): Populate the following variables from your example. height = 240 # Image height width = 304 # Image width filename = str.encode( fileidx + 'npy.gz') # Filename of the image. Empty if image is not from file encoded_image_data = data # Encoded image bytes indices = np.int64(np.random.random((1000, 3)) * [304, 240, 2]) indices0, indices1, indices2 = indices.T indices = indices.flatten().tolist() # indices = np.int64(np.random.random((1, 3))*[304, 240, 3]) # indices = indices.tobytes() sh = np.array([304, 240, 2]).astype(np.int64) values = (np.random.random(len(indices)) * 256).astype( np.float32).flatten().tolist() # values = values.tobytes() image_format = b'vEvent' # b'jpeg' or b'png' xmins = [ annotations[2] ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [annotations[4] ] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ annotations[1] ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [annotations[3] ] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [str.encode(obj_type) ] # List of string class name of bounding box (1 per box) classes = [classesid[obj_type] ] # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/indices': dataset_util.int64_list_feature(indices), 'image/indices0': dataset_util.int64_list_feature(indices0), 'image/indices1': dataset_util.int64_list_feature(indices1), 'image/indices2': dataset_util.int64_list_feature(indices2), 'image/values': dataset_util.float_list_feature(values), 'image/shape': dataset_util.int64_list_feature(sh), 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False, keypoint_annotations_dict=None, densepose_annotations_dict=None, remove_non_person_annotations=False, remove_non_person_images=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. keypoint_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the keypoint information for this person object annotation. If None, then no keypoint annotations will be populated. densepose_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V'] representing part surface coordinates. For more information see http://densepose.org/. remove_non_person_annotations: Whether to remove any annotations that are not the "person" class. remove_non_person_images: Whether to remove any images that do not contain at least one "person" annotation. Returns: key: SHA256 hash of the image. example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. num_keypoint_annotation_skipped: Number of keypoint annotations that were skipped. num_densepose_annotation_skipped: Number of DensePose annotations that were skipped. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] keypoints_x = [] keypoints_y = [] keypoints_visibility = [] keypoints_name = [] num_keypoints = [] include_keypoint = keypoint_annotations_dict is not None num_annotations_skipped = 0 num_keypoint_annotation_used = 0 num_keypoint_annotation_skipped = 0 dp_part_index = [] dp_x = [] dp_y = [] dp_u = [] dp_v = [] dp_num_points = [] densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox'] include_densepose = densepose_annotations_dict is not None num_densepose_annotation_used = 0 num_densepose_annotation_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) category_name = category_index[category_id]['name'].encode('utf8') if remove_non_person_annotations and category_name != b'person': num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_ids.append(category_id) category_names.append(category_name) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if include_keypoint: annotation_id = object_annotations['id'] if annotation_id in keypoint_annotations_dict: num_keypoint_annotation_used += 1 keypoint_annotations = keypoint_annotations_dict[annotation_id] keypoints = keypoint_annotations['keypoints'] num_kpts = keypoint_annotations['num_keypoints'] keypoints_x_abs = keypoints[::3] keypoints_x.extend( [float(x_abs) / image_width for x_abs in keypoints_x_abs]) keypoints_y_abs = keypoints[1::3] keypoints_y.extend( [float(y_abs) / image_height for y_abs in keypoints_y_abs]) keypoints_visibility.extend(keypoints[2::3]) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(num_kpts) else: keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(0) if include_densepose: annotation_id = object_annotations['id'] if (annotation_id in densepose_annotations_dict and all(key in densepose_annotations_dict[annotation_id] for key in densepose_keys)): dp_annotations = densepose_annotations_dict[annotation_id] num_densepose_annotation_used += 1 dp_num_points.append(len(dp_annotations['dp_I'])) dp_part_index.extend([ int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I'] ]) # DensePose surface coordinates are defined on a [256, 256] grid # relative to each instance box (i.e. absolute coordinates in range # [0., 256.]). The following converts the coordinates # so that they are expressed in normalized image coordinates. dp_x_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_x'] ] dp_x_norm = [(float(x) + x_box_rel * width) / image_width for x_box_rel in dp_x_box_rel] dp_y_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_y'] ] dp_y_norm = [(float(y) + y_box_rel * height) / image_height for y_box_rel in dp_y_box_rel] dp_x.extend(dp_x_norm) dp_y.extend(dp_y_norm) dp_u.extend(dp_annotations['dp_U']) dp_v.extend(dp_annotations['dp_V']) else: dp_num_points.append(0) if (remove_non_person_images and not any(name == b'person' for name in category_names)): return (key, None, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) if include_keypoint: feature_dict['image/object/keypoint/x'] = ( dataset_util.float_list_feature(keypoints_x)) feature_dict['image/object/keypoint/y'] = ( dataset_util.float_list_feature(keypoints_y)) feature_dict['image/object/keypoint/num'] = ( dataset_util.int64_list_feature(num_keypoints)) feature_dict['image/object/keypoint/visibility'] = ( dataset_util.int64_list_feature(keypoints_visibility)) feature_dict['image/object/keypoint/text'] = ( dataset_util.bytes_list_feature(keypoints_name)) num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) - num_keypoint_annotation_used) if include_densepose: feature_dict['image/object/densepose/num'] = ( dataset_util.int64_list_feature(dp_num_points)) feature_dict['image/object/densepose/part_index'] = ( dataset_util.int64_list_feature(dp_part_index)) feature_dict['image/object/densepose/x'] = ( dataset_util.float_list_feature(dp_x)) feature_dict['image/object/densepose/y'] = ( dataset_util.float_list_feature(dp_y)) feature_dict['image/object/densepose/u'] = ( dataset_util.float_list_feature(dp_u)) feature_dict['image/object/densepose/v'] = ( dataset_util.float_list_feature(dp_v)) num_densepose_annotation_skipped = (len(densepose_annotations_dict) - num_densepose_annotation_used) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return (key, example, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def create_tf_example(filename, label_file): img = cv2.imread(filename) height, width, channels = img.shape with tf.gfile.GFile(filename, 'rb') as fid: encoded_image_data = fid.read() image_format = b'jpg' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) with open(label_file, 'r') as f: csvreader = csv.reader(f, delimiter=' ') head = True for row in csvreader: if head: head = False continue name = row[-1] classes_text.append(name) classes.append(get_index(name)) xmins.append(float(row[0]) / width) xmaxs.append(float(row[2]) / width) ymins.append(float(row[1]) / height) ymaxs.append(float(row[3]) / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def background_tf_example(image_path, ): """ Args: image_path: Full path to image file Returns: example: The converted tf.Example. """ full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() filename = full_path.split('/')[-1] width = image.width height = image.height xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def xml_to_tf(path_input, path_output): xml_list = [] column_name = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] print(path_output) writer = tf.io.TFRecordWriter(path_output) files = os.listdir(path_input) for file in files: if file.endswith(".xml"): xmlFile = path_input + file tree = ET.parse(xmlFile) root = tree.getroot() filename = root[1].text width = int(root[4][0].text) height = int(root[4][1].text) xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for member in root.findall('object'): beer = member[0].text xmin = int(member[4][0].text) ymin = int(member[4][1].text) xmax = int(member[4][2].text) ymax = int(member[4][3].text) xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append(beer.encode('utf8')) classes.append(class_text_to_int(beer)) with tf.io.gfile.GFile( os.path.join(path_input, '{}'.format(filename)), 'rb') as fid: encoded_jpg = fid.read() print(encoded_jpg) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(IMAGE_FORMAT), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), path_output) print('Successfully created the TFRecords: {}'.format(output_path))
def createTFExample(self): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: None Returns: example: The converted tf.Example. """ with tf.io.gfile.GFile(self.xml, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] # the image might be processed in a different location # so overwrite the path to the input image path for consistency data['path'] = self.jpg if self.crop == '' else self.__cropImage(data) print(f"Processing image {data['path']}") width = int(data['size']['width']) height = int(data['size']['height']) filename = data['filename'].encode('utf8') with tf.io.gfile.GFile(data['path'], 'rb') as fid: encoded_image_data = fid.read() image_format = 'jpeg'.encode('utf8') # List of normalized left x coordinates in bounding box (1 per box) xmins = [] # List of normalized right x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized top y coordinates in bounding box (1 per box) ymins = [] # List of normalized bottom y coordinates in bounding box (1 per box) ymaxs = [] # List of string class name of bounding box (1 per box) classes_text = [] classes_id = [] # List of integer class id of bounding box (1 per box) image = util.loadImage(data['path']) for obj in data['object']: if obj['name'] not in classes or not self.__isValidBox( obj, width, height): print('Unexpected object: ' + str(obj) + ' in ' + data['path']) continue xmins.append(float(obj['bndbox']['xmin']) / width) ymins.append(float(obj['bndbox']['ymin']) / height) xmaxs.append(float(obj['bndbox']['xmax']) / width) ymaxs.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes_id.append(getClassID(obj['name'])) util.drawBox(image, self.__encodeBox(obj['bndbox'])) util.saveImage(image, str(data['path']).replace(".jpg", "-with-boxes.jpg")) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes_id), })) return tf_example
def toTfrecord(f, pathTofile): height = None # Image height width = None # Image width filename = None # Filename of the image. Empty if image is not from file encoded_image_data = None # Encoded image bytes image_format = b'jpeg' # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ ] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) poses = [] truncated = [] difficult_obj = [] filename = f.readline().rstrip() print(filename) full_path = os.path.join(pathTofile, filename) print(full_path) with tf.io.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) image_raw = cv2.imread(full_path) key = hashlib.sha256(encoded_jpg).hexdigest() height, width, channel = image_raw.shape print("height is %d, width is %d, channel is %d" % (height, width, channel)) face_num = int(f.readline().rstrip()) valid_face_num = 0 print("face_num:>>", face_num) for i in range(face_num): annot = f.readline().rstrip().split() # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY if (float(annot[2]) > 25.0): if (float(annot[3]) > 30.0): xmins.append(max(0.005, (float(annot[0]) / width))) ymins.append(max(0.005, (float(annot[1]) / height))) xmaxs.append( min(0.995, ((float(annot[0]) + float(annot[2])) / width))) ymaxs.append( min(0.995, ((float(annot[1]) + float(annot[3])) / height))) classes_text.append("face".encode('utf8')) classes.append(0) print(xmins[-1], ymins[-1], xmaxs[-1], ymaxs[-1], classes_text[-1], classes[-1]) valid_face_num += 1 feature_dict = { 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(value=classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } print("xxxxx", xmins) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) # key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = (annotations['2d_bbox_left']) / float(width) ymin_norm = (annotations['2d_bbox_top']) / float(height) xmax_norm = (annotations['2d_bbox_right']) / float(width) ymax_norm = (annotations['2d_bbox_bottom']) / float(height) # difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), # 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), # 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), # 'image/object/class/text': dataset_util.bytes_list_feature( # [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), # 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), # 'image/object/truncated': dataset_util.float_list_feature( # annotations['truncated']), # 'image/object/alpha': dataset_util.float_list_feature( # annotations['alpha']), # 'image/object/3d_bbox/height': dataset_util.float_list_feature( # annotations['3d_bbox_height']), # 'image/object/3d_bbox/width': dataset_util.float_list_feature( # annotations['3d_bbox_width']), # 'image/object/3d_bbox/length': dataset_util.float_list_feature( # annotations['3d_bbox_length']), # 'image/object/3d_bbox/x': dataset_util.float_list_feature( # annotations['3d_bbox_x']), # 'image/object/3d_bbox/y': dataset_util.float_list_feature( # annotations['3d_bbox_y']), # 'image/object/3d_bbox/z': dataset_util.float_list_feature( # annotations['3d_bbox_z']), # 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( # annotations['3d_bbox_rot_y']), })) return example
def dict_to_tf_example(data, dataset_directory, ignore_difficult_instances=False, image_subdirectory='All_Images'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ #img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, 'SSD_Training_Data', 'All_Images', data['filename']) full_path = full_path.replace('_mp4', '.mp4') if '.jpg' not in full_path: full_path = full_path + '.jpg' with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] #poses = [] difficult_obj = [] boxes = [] small_boxes_count = 0 if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue nm = obj['name'] if nm.lower() == 'Other': class_id = 1 else: class_id = 1 xmin_norm = float(obj['bndbox']['xmin']) / width ymin_norm = float(obj['bndbox']['ymin']) / height xmax_norm = float(obj['bndbox']['xmax']) / width ymax_norm = float(obj['bndbox']['ymax']) / height # Skip boxes with size less than: if min(xmax_norm - xmin_norm, ymax_norm - ymin_norm) < 0.008: small_boxes_count += 1 continue difficult_obj.append(int(difficult)) xmin.append(xmin_norm) ymin.append(ymin_norm) xmax.append(xmax_norm) ymax.append(ymax_norm) boxes.append([xmin[-1], ymin[-1], xmax[-1], ymax[-1]]) # classes_text.append(obj['name'].encode('utf8')) classes_text.append('ferrari'.encode('utf8')) classes.append(class_id) truncated.append(int(obj['truncated'])) #poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), #'image/object/view': dataset_util.bytes_list_feature(poses), })) return example, boxes, small_boxes_count
def read_xml_make_tfrecord(): num_data = 8 for i in range(num_data): globals()['train_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data))] = tensorflow.io.TFRecordWriter( 'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data))) for i in range(int(num_data / 8)): globals()['test_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) globals()['valid_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) length = len(os.listdir(folder)) for number, img_name in enumerate(os.listdir(folder)): if img_name[-4:] != '.jpg': continue filename = img_name[:-4] img = cv2.imread(folder + filename + ".jpg") height, width = img.shape[:2] mask = cv2.imread('mask/' + filename + '.jpg', 0) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) cv2.imshow("asdas", mask) cv2.waitKey() _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) print(contours) contours = sorted(contours, key=lambda x: len(x), reverse=True) x = [temp[0][0] for temp in contours[0]] y = [temp[0][1] for temp in contours[0]] xmin = min(x) xmax = max(x) ymin = min(y) ymax = max(y) # cv2.circle(img,(xmin,ymin),5,(255,0,0),5) # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5) # cv2.imshow("asd",img) # cv2.waitKey() object_name = 'passport' pixel_val = 255 with tensorflow.io.gfile.GFile(folder + filename + ".jpg", 'rb') as fid: encoded_image_data = fid.read() key = hashlib.sha256(encoded_image_data).hexdigest() with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg", 'rb') as fid: encoded_mask_data = fid.read() encoded_mask = io.BytesIO(encoded_mask_data) mask = Image.open(encoded_mask) mask_np = np.asarray(mask.convert('L')) mask_remapped = (mask_np == pixel_val).astype(np.uint8) # print("mask",mask_remapped.shape) # cv2.imshow("asd",mask_remapped*255) # cv2.waitKey() mask_img = Image.fromarray(mask_remapped) output = io.BytesIO() mask_img.save(output, format='PNG') xmins = [xmin / width] xmaxs = [xmax / width] ymins = [ymin / height] ymaxs = [ymax / height] classes_text = [object_name.encode('utf8')] classes = [1] masks = [output.getvalue()] print(img_name) print(xmins) print(xmaxs) print(ymins) print(ymaxs) print(classes_text) print(classes) print(masks) example = tensorflow.train.Example(features=tensorflow.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(masks), })) if number < length * 0.8: globals()['train_writer_{:05d}-of-{:05d}'.format( int(number / (length * 0.8) * num_data), int(num_data))].write(example.SerializeToString()) elif number < length * 0.9: globals()['valid_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.8) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString()) elif number < length: globals()['test_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.9) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString())
def create_tf_example(group, path, class_dict): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) try: image = Image.open(encoded_jpg_io) except Exception as ex: print(ex) print('Invalid image, skipping: ', group.filename) return None width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): if set(['xmin_rel', 'xmax_rel', 'ymin_rel', 'ymax_rel']).issubset(set(row.index)): xmin = row['xmin_rel'] xmax = row['xmax_rel'] ymin = row['ymin_rel'] ymax = row['ymax_rel'] elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)): xmin = row['xmin'] / width xmax = row['xmax'] / width ymin = row['ymin'] / height ymax = row['ymax'] / height xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(row['class'].encode('utf8')) classes.append(class_dict[row['class']]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): # image_height = image[2] # image_width = image[1] # filename = image[0]# TODO(user): Populate the following variables from your example. # print(image) height = image['height'] # Image height width = image['width'] # Image width filename = image[ 'filename'] # Filename of the image. Empty if image is not from file full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_image_io = io.BytesIO(encoded_jpg) # Encoded image bytes image = PIL.Image.open(encoded_image_io) only_file_name, image_format = os.path.splitext(filename) xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per bo) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) # print(len(annotations_list)) for annotation in annotations_list: # print(annotation) xmins.append(annotation['xmin'] / width) xmaxs.append(annotation['xmax'] / width) ymins.append(annotation['ymin'] / height) ymaxs.append(annotation['ymax'] / height) classes_text.append(annotation['label_text'].encode('utf8')) classes.append(annotation['label']) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image, image_dir, bbox_annotations=None, category_index=None, include_mask=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids', u'neg_category_ids'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official LVIS dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing LVIS category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_mask: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: success: whether the conversion is successful filename: image filename example: The converted tf.Example Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['coco_url'] filename = osp.join(*filename.split('/')[-2:]) image_id = image['id'] image_not_exhaustive_category_ids = image['not_exhaustive_category_ids'] image_neg_category_ids = image['neg_category_ids'] full_path = os.path.join(image_dir, filename) if not tf.gfile.Exists(full_path): tf.logging.warn(f'image {full_path} not exists! skip') return False, None, None with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/not_exhaustive_category_ids': dataset_util.int64_list_feature(image_not_exhaustive_category_ids), 'image/image_neg_category_ids': dataset_util.int64_list_feature(image_neg_category_ids), } if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations['bbox']) xmin_single = max(float(x) / image_width, 0.0) xmax_single = min(float(x + width) / image_width, 1.0) ymin_single = max(float(y) / image_height, 0.0) ymax_single = min(float(y + height) / image_height, 1.0) if xmax_single <= xmin_single or ymax_single <= ymin_single: continue xmin.append(xmin_single) xmax.append(xmax_single) ymin.append(ymin_single) ymax.append(ymax_single) is_crowd.append(0) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_mask: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), }) if include_mask: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return True, filename, example
def dict_to_tf_example(data, image_path, label_map_dict, ignore_difficult_instances=False, image_subdirectory='images'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_path: Full path to image file label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # img_path = os.path.join( # data['folder'], image_subdirectory, data['filename']) # full_path = os.path.join(dataset_directory, img_path) full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) filename = full_path.split('/')[-1] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = False # bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) # truncated.append(int(obj['truncated'])) truncated.append(0) # poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(image_det, image_path, pdt): with tf.gfile.Open(image_path, 'rb') as image_file: encoded_image_data = image_file.read() with Image.open(image_path) as img: width, height = img.size image_format = b'jpeg' # filename = image_path.decode() filename = os.path.basename(image_path).encode( "utf-8") # Filename of the image. Empty if image is not from file # image_format = image_path.split('.')[-1] # b'jpeg' or b'png' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding boxz # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for row in image_det.iterrows(): xmin = row[1]['XMin'] xmax = row[1]['XMax'] ymin = row[1]['YMin'] ymax = row[1]['YMax'] labelid = row[1]['LabelName'] class_text = labelid.encode("utf-8") class_ = pdt[pdt['labelid'] == labelid].id.values[0] xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(class_text) classes.append(class_) print("\nimage : {}".format(image_path)) print("classes : {}".format(classes_text)) print('classes_num : {}\n'.format(classes)) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def _create_tf_entry(self, categories, img, label, filename, annotations): imageFormat = b'jpg' width, height = img.size imgByteArr = io.BytesIO() img.save(imgByteArr, format='JPEG') encodedImageData = imgByteArr.getvalue() xmins = [] xmaxs = [] ymins = [] ymaxs = [] for annotation in annotations: rect = None if type( annotation.data ) is Rectangle: #currently we only support Rect annotations, TODO: change me rect = annotation.data elif type(annotation.data) is Polygon: rect = annotation.data.rect if rect is not None: trimmed_rect = rect.trim( Rectangle(0, 0, width, height) ) #scale to image dimension in case annotation exceeds image width/height if trimmed_rect.left < 0: raise ImageMonkeyGeneralError( "trimmed rect left dimension invalid! (<0)") if trimmed_rect.top < 0: raise ImageMonkeyGeneralError( "trimmed rect top dimension invalid! (<0)") if trimmed_rect.width < 0: raise ImageMonkeyGeneralError( "trimmed rect width dimension invalid! (<0)") if trimmed_rect.height < 0: raise ImageMonkeyGeneralError( "trimmed rect height dimension invalid! (<0)") if (trimmed_rect.left + trimmed_rect.width) > width: raise ImageMonkeyGeneralError( "bounding box width > image width!") if (trimmed_rect.top + trimmed_rect.height) > height: raise ImageMonkeyGeneralError( "bounding box height > image height!") xmin = trimmed_rect.left / float(width) xmax = (trimmed_rect.left + trimmed_rect.width) / float(width) ymin = trimmed_rect.top / float(height) ymax = (trimmed_rect.top + trimmed_rect.height) / float(height) #sanity checks if xmin > xmax: raise ImageMonkeyGeneralError("xmin > xmax!") if ymin > ymax: raise ImageMonkeyGeneralError("ymin > ymax!") if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0): continue #skip bounding boxes that are 0 xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) #we might have some images in our dataset, which don't have a annotation, skip those if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0) or (len(ymaxs) == 0)): return None classes = [(categories.index(label) + 1)] * len( xmins) #class indexes start with 1 labels = [label.encode('utf8')] * len(xmins) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encodedImageData), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(): count = 0 counter = 0 writer = tf.python_io.TFRecordWriter( "/Data2TB/chl_data/rgb/train/augmented/train.record") #output file #with open(filename) as f: # content = f.readlines() #content = [x.strip() for x in content] #new_img = PIL.Image.new("L", (480, 640)) #new_img.putdata(content) #with tf.gfile.GFile(filename, 'rb') as fid: # encoded_jpg = fid.read() with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f: jsondata = json.load(f) for i in range(0, len(jsondata['frames'])): #looping through JSON objects height = jsondata['frames'][i]["height"] # Image height width = jsondata['frames'][i]["width"] # Image width #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file #encoded_image_data = None # Encoded image bytes filename_only = jsondata['frames'][i]['file'] print(str(i) + ": " + filename_only) filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [ ] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for j in range(0, len(jsondata['frames'][i]['annotations'])): if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'): xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width xmax = ( jsondata['frames'][i]['annotations'][j]['x'] + jsondata['frames'][i]['annotations'][j]['width']) / width ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height ymax = ( jsondata['frames'][i]['annotations'][j]['y'] + jsondata['frames'][i]['annotations'][j]['height']) / height if xmin > 1: xmin = 1.0 if xmax > 1: xmax = 1.0 if ymin > 1: ymin = 1.0 if ymax > 1: ymax = 1.0 if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1): print("UNNORMALIZED STUFF") xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append('head') classes.append(1) #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'): # xmin = (jsondata['frames'][i]['annotations'][j]['x']) # ymin = (jsondata['frames'][i]['annotations'][j]['y']) # if(xmin + 2 > width): # xmin = width - 2 # if(ymin + 2 > height): # ymin = height - 2 # xmax = xmin + 2 # ymax = ymin + 2 # xminf = xmin/width # xmaxf = xmax/width # yminf = ymin/height # ymaxf = ymax/height # # if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1): # print("UNNORMALIZED STUFF") # xmins.append(xminf) # xmaxs.append(xmaxf) # ymins.append(yminf) # ymaxs.append(ymaxf) # classes_text.append('shoulder') # classes.append(2) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close()
def json_to_record(j): assert (len(j["image_size"]) == 1) assert (len(j["categories"]) == len(j["annotations"])) image_size = j["image_size"][0] height = image_size["height"] width = image_size["width"] filename = os.path.basename(j["file"]) # actual image bytes? refer to dataset_tools/create_pet_tf_record.py with tf.gfile.GFile(j["file"], "rb") as fid: encoded_jpg = fid.read() pass encoded_image_data = encoded_jpg image_format = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for annot in j["annotations"]: c_name = class_id_to_name(annot["class_id"]) classes_text.append(c_name.encode("utf8")) # class_ids are indexed by 1 for tensorflow classes.append(annot["class_id"] + 1) corners = get_box_corners(annot) xmins.append(corners["xmin"] / width) xmaxs.append(corners["xmax"] / width) ymins.append(corners["ymin"] / height) ymaxs.append(corners["ymax"] / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode("utf8")), 'image/source_id': dataset_util.bytes_feature(filename.encode("utf8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) # print(tf_example) return tf_example pass
def taco_to_tfrecord(self, dataset_dir, round, subset, tf_output, class_ids=None, class_map=None, return_taco=False, auto_download=False): """Load a subset of the TACO dataset. AND convert it to TF record dataset_dir: The root directory of the TACO dataset. round: split number subset: which subset to load (train, val, test) class_ids: If provided, only loads images that have the given classes. class_map: Dictionary used to assign original classes to new class system return_coco: If True, returns the COCO object. auto_download: Automatically download and unzip MS-COCO images and annotations """ if not tf_output: raise ValueError("please provide a tf_output prefix parameter") # TODO: Once we got the server running # if auto_download is True: # self.auto_download(dataset_dir, subset, year) ann_filepath = os.path.join(dataset_dir , 'annotations_') if round != None: ann_filepath += str(round) + "_" + subset + ".json" else: ann_filepath += subset + ".json" assert os.path.isfile(ann_filepath) # Load dataset dataset = json.load(open(ann_filepath, 'r')) # Replace dataset original classes before calling the coco Constructor # Some classes may be assigned background to remove them from the dataset self.replace_dataset_classes(dataset, class_map) taco_alla_coco = COCO() taco_alla_coco.dataset = dataset taco_alla_coco.createIndex() # Add images and classes except Background # Definitely not the most efficient way image_ids = [] background_id = -1 class_ids = sorted(taco_alla_coco.getCatIds()) for i in class_ids: class_name = taco_alla_coco.loadCats(i)[0]["name"] if class_name != 'Background': self.add_class("taco", i, class_name) image_ids.extend(list(taco_alla_coco.getImgIds(catIds=i))) else: background_id = i image_ids = list(set(image_ids)) if background_id > -1: class_ids.remove(background_id) print('Number of images used:', len(image_ids)) ## Write all the classes, ordered by id ## # include Background to class labels # just nothing gets trained on class_id = 0 all_class_ids = sorted(taco_alla_coco.getCatIds()) with open(tf_output + "_classes.names", "w") as f: for class_id in all_class_ids: name = taco_alla_coco.cats[class_id]['name'] f.write(name + "\n") writer = tf.io.TFRecordWriter(tf_output + "_" + subset + ".bin") # Add images for i in image_ids: height = taco_alla_coco.imgs[i]["height"] width = width=taco_alla_coco.imgs[i]["width"] img = taco_alla_coco.imgs[i] annotations = taco_alla_coco.imgToAnns[img['id']] ## lists of features per image ## # bbox coordinates xminl = [] yminl = [] xmaxl = [] ymaxl = [] # category (numeric) catl = [] # category name labell = [] # default stuff from pascal voc (always set empty here.) viewl = [] # I think the original voc contains stuff like "frontal" truncatedl = [] difficultl = [] for ann in annotations: if ann['category_id'] == 0: # if a label category has been replaced by category_id == 0 # when loading the category map we skip it # because it's Background. and we don't need to a learn a bbox # with background. continue # category catl.append(ann['category_id']) # label of the category cat = taco_alla_coco.cats[ann['category_id']] labell.append(cat['name'].encode('utf8')) # bbox xmin, ymin, bbox_width, bbox_height = tuple(ann['bbox']) xminl.append(float(xmin) / width) yminl.append(float(ymin) / height) xmaxl.append(float(xmin + bbox_width) / width) ymaxl.append(float(ymin + bbox_height) / height) # defaults viewl.append("".encode('utf8')) truncatedl.append(0) difficultl.append(0) filename = os.path.join(dataset_dir, taco_alla_coco.imgs[i]['file_name']) filename = filename.encode('utf8') with tf.io.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xminl), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxl), 'image/object/bbox/ymin': dataset_util.float_list_feature(yminl), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxl), 'image/object/class/text': dataset_util.bytes_list_feature(labell), 'image/object/class/label': dataset_util.int64_list_feature(catl), # we put these in just to be look like pascal voc example # but they're always set to defaults # see : https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_pascal_tf_record.py#L124 'image/object/difficult': dataset_util.int64_list_feature(difficultl), 'image/object/truncated': dataset_util.int64_list_feature(truncatedl), 'image/object/view': dataset_util.bytes_list_feature(viewl), })) writer.write(example.SerializeToString()) writer.close()