def dict_to_tf_example(img_path, labels, sp): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset (here only head available) directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') if image.mode != 'RGB': image = image.convert('RGB') width, height = image.size xmin = [] ymin = [] xmax = [] ymax = [] classes = [] for label in labels: _xmin, _ymin, _xmax, _ymax = label.split(sp)[:4] xmin.append(int(_xmin) / width) ymin.append(int(_ymin) / height) xmax.append(int(_xmax) / width) ymax.append(int(_ymax) / height) classes.append(1) example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_utils.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_utils.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_utils.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_utils.float_list_feature(ymax), 'image/object/class/label': tfrecord_utils.int64_list_feature( classes), })) return example
def _create_tf_example(entry): """ Creates a tf.train.Example to be saved in the TFRecord file. Args: entry: string containing the path to a image and its label. Return: tf_example: tf.train.Example containing the info stored in feature """ image_path, label = _get_image_and_label_from_entry(entry) # Convert the jpeg image to raw image. image = Image.open(image_path) image_np = np.array(image) image_raw = image_np.tostring() # Data which is going to be stored in the TFRecord file feature = { 'image': tfrecord_utils.bytes_feature(image_raw), 'image/height': tfrecord_utils.int64_feature(image_np.shape[0]), 'image/width': tfrecord_utils.int64_feature(image_np.shape[1]), 'label': tfrecord_utils.int64_feature(label), } tf_example = tf.train.Example(features=tf.train.Features(feature=feature)) return tf_example
def dict_to_tf_example(img_path, labels): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset (here only head available) directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') if image.mode != 'RGB': image = image.convert('RGB') width, height = image.size x0 = [] y0 = [] x1 = [] y1 = [] x2 = [] y2 = [] x3 = [] y3 = [] classes = [] #labels : [2, 4, num_boxes] if labels.ndim == 3: for i in range(labels.shape[2]): y0.append(labels[1][0][i] / height) x0.append(labels[0][0][i] / width ) y1.append(labels[1][1][i] / height) x1.append(labels[0][1][i] / width ) y2.append(labels[1][2][i] / height) x2.append(labels[0][2][i] / width ) y3.append(labels[1][3][i] / height) x3.append(labels[0][3][i] / width ) classes.append(1) else: y0.append(labels[1][0] / height) x0.append(labels[0][0] / width ) y1.append(labels[1][1] / height) x1.append(labels[0][1] / width ) y2.append(labels[1][2] / height) x2.append(labels[0][2] / width ) y3.append(labels[1][3] / height) x3.append(labels[0][3] / width ) classes.append(1) example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/y0': tfrecord_utils.float_list_feature(y0), 'image/object/bbox/x0': tfrecord_utils.float_list_feature(x0), 'image/object/bbox/y1': tfrecord_utils.float_list_feature(y1), 'image/object/bbox/x1': tfrecord_utils.float_list_feature(x1), 'image/object/bbox/y2': tfrecord_utils.float_list_feature(y2), 'image/object/bbox/x2': tfrecord_utils.float_list_feature(x2), 'image/object/bbox/y3': tfrecord_utils.float_list_feature(y3), 'image/object/bbox/x3': tfrecord_utils.float_list_feature(x3), 'image/object/class/label': tfrecord_utils.int64_list_feature(classes), })) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset (here only head available) directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.splitext( os.path.join(image_subdirectory, data['filename']))[0] + ".jpg" with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') if image.mode != 'RGB': image = image.convert('RGB') # generate hash key for image key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_name = obj['name'] classes_text.append(class_name.encode('utf8')) classes.append(int(label_map_dict[class_name]) - 1) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_utils.int64_feature(height), 'image/width': tfrecord_utils.int64_feature(width), 'image/filename': tfrecord_utils.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': tfrecord_utils.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': tfrecord_utils.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_utils.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_utils.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_utils.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_utils.float_list_feature(ymax), 'image/object/class/text': tfrecord_utils.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_utils.int64_list_feature(classes), 'image/object/difficult': tfrecord_utils.int64_list_feature(difficult_obj), })) return example
def dict_to_tf_example(img_path, labels, sp): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset (here only head available) directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') if image.mode != 'RGB': image = image.convert('RGB') width, height = image.size x0 = [] y0 = [] x1 = [] y1 = [] x2 = [] y2 = [] x3 = [] y3 = [] classes = [] for label in labels: _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, txt = label.split(sp)[:9] if "###" in txt: continue try: _x0 = int(_x0) except: _x0 = int(_x0[1:]) _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [ int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3] ] y0.append(_y0 / height) x0.append(_x0 / width) y1.append(_y1 / height) x1.append(_x1 / width) y2.append(_y2 / height) x2.append(_x2 / width) y3.append(_y3 / height) x3.append(_x3 / width) classes.append(1) if len(y0) == 0: return None example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')), 'image/object/bbox/y0': tfrecord_utils.float_list_feature(y0), 'image/object/bbox/x0': tfrecord_utils.float_list_feature(x0), 'image/object/bbox/y1': tfrecord_utils.float_list_feature(y1), 'image/object/bbox/x1': tfrecord_utils.float_list_feature(x1), 'image/object/bbox/y2': tfrecord_utils.float_list_feature(y2), 'image/object/bbox/x2': tfrecord_utils.float_list_feature(x2), 'image/object/bbox/y3': tfrecord_utils.float_list_feature(y3), 'image/object/bbox/x3': tfrecord_utils.float_list_feature(x3), 'image/object/class/label': tfrecord_utils.int64_list_feature( classes), })) return example
def dict_to_tf_example(img_path, labels, image_size=None): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset (here only head available) directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') if image.mode != 'RGB': image = image.convert('RGB') width, height = image.size # quadrilateral coordinate q_x0 = [] q_y0 = [] q_x1 = [] q_y1 = [] q_x2 = [] q_y2 = [] q_x3 = [] q_y3 = [] q_classes = [] # normal coordinate o_cx = [] o_cy = [] o_w = [] o_h = [] ######################################################## # # Normalize all value to [0~1] # This steps makes resizing purpose much more easily # ######################################################## # labels : [2, 4, num_boxes] if labels.ndim == 3: for i in range(labels.shape[2]): q_x0.append(labels[0][0][i] / width) q_y0.append(labels[1][0][i] / height) q_x1.append(labels[0][1][i] / width) q_y1.append(labels[1][1][i] / height) q_x2.append(labels[0][2][i] / width) q_y2.append(labels[1][2][i] / height) q_x3.append(labels[0][3][i] / width) q_y3.append(labels[1][3][i] / height) q_classes.append(1) x_min = min(labels[0][0][i], labels[0][1][i], labels[0][2][i], labels[0][3][i]) y_min = min(labels[1][0][i], labels[1][1][i], labels[1][2][i], labels[1][3][i]) x_max = max(labels[0][0][i], labels[0][1][i], labels[0][2][i], labels[0][3][i]) y_max = max(labels[1][0][i], labels[1][1][i], labels[1][2][i], labels[1][3][i]) o_cx.append((x_min + x_max) / (width * 2)) o_cy.append((y_min + y_max) / (height * 2)) o_w.append(abs(x_max - x_min) / width) o_h.append(abs(y_max - y_min) / height) # labels : [2, 4] else: q_x0.append(labels[0][0] / width) q_y0.append(labels[1][0] / height) q_x1.append(labels[0][1] / width) q_y1.append(labels[1][1] / height) q_x2.append(labels[0][2] / width) q_y2.append(labels[1][2] / height) q_x3.append(labels[0][3] / width) q_y3.append(labels[1][3] / height) q_classes.append(1) x_min = min(labels[0][0], labels[0][1], labels[0][2], labels[0][3]) y_min = min(labels[1][0], labels[1][1], labels[1][2], labels[1][3]) x_max = max(labels[0][0], labels[0][1], labels[0][2], labels[0][3]) y_max = max(labels[1][0], labels[1][1], labels[1][2], labels[1][3]) o_cx.append((x_min + x_max) / (width * 2)) o_cy.append((y_min + y_max) / (height * 2)) o_w.append(abs(x_max - x_min) / width) o_h.append(abs(y_max - y_min) / height) if image_size[0] and image_size[1] is not None: width, height = image_size image.resize((width, height), PIL.Image.ANTIALIAS) example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpg'.encode('utf8')), 'image/width': tfrecord_utils.int64_feature(width), 'image/height': tfrecord_utils.int64_feature(height), 'image/object/bbox/y0': tfrecord_utils.float_list_feature(q_y0), 'image/object/bbox/x0': tfrecord_utils.float_list_feature(q_x0), 'image/object/bbox/y1': tfrecord_utils.float_list_feature(q_y1), 'image/object/bbox/x1': tfrecord_utils.float_list_feature(q_x1), 'image/object/bbox/y2': tfrecord_utils.float_list_feature(q_y2), 'image/object/bbox/x2': tfrecord_utils.float_list_feature(q_x2), 'image/object/bbox/y3': tfrecord_utils.float_list_feature(q_y3), 'image/object/bbox/x3': tfrecord_utils.float_list_feature(q_x3), 'image/object/class/label': tfrecord_utils.int64_list_feature(q_classes), 'image/object/bbox/cy': tfrecord_utils.float_list_feature(o_cy), 'image/object/bbox/cx': tfrecord_utils.float_list_feature(o_cx), 'image/object/bbox/w': tfrecord_utils.float_list_feature(o_w), 'image/object/bbox/h': tfrecord_utils.float_list_feature(o_h), })) return example
def dict_to_tf_example(data, image_file_name, image_directory, label_map_dict, coder): img_name = image_file_name + '.jpg' full_path = os.path.join(image_directory, img_name) if not tf.gfile.Exists(full_path): full_path = os.path.join(full_path[:-3] + 'jpeg') if tf.gfile.Exists(full_path) != 1: print('1') return 0 encoded_jpg = tf.gfile.GFile(full_path, 'rb').read() encoded_jpg_io = io.BytesIO(encoded_jpg) key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) if width == 0 or height == 0: print('2') return 0 xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] if 'object' in data: for obj in data['object']: obj['name'] = obj['name'].lower() xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) if len(classes) == 0: return 0 elif len(classes) != len(classes_text): return 0 elif len(classes) != len(xmin): return 0 if len(classes) >= 100: print('This image has more than 100 objects :', image_file_name) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': tfrecord_utils.int64_feature(height), 'image/width': tfrecord_utils.int64_feature(width), 'image/filename': tfrecord_utils.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': tfrecord_utils.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': tfrecord_utils.bytes_feature(key.encode('utf8')), 'image/encoded': tfrecord_utils.bytes_feature(encoded_jpg), 'image/format': tfrecord_utils.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': tfrecord_utils.float_list_feature(xmin), 'image/object/bbox/xmax': tfrecord_utils.float_list_feature(xmax), 'image/object/bbox/ymin': tfrecord_utils.float_list_feature(ymin), 'image/object/bbox/ymax': tfrecord_utils.float_list_feature(ymax), 'image/object/class/text': tfrecord_utils.bytes_list_feature(classes_text), 'image/object/class/label': tfrecord_utils.int64_list_feature(classes) })) return example