def testDecodeEmptyPngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) encoded_masks = [] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks), 'image/height': dataset_util.int64_feature(10), 'image/width': dataset_util.int64_feature(10), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, [0, 10, 10])
def create_tf_example(example, writer): height = example['height'] width = example['width'] filename = example['filename'] encoded_image_data = example['encoded_image_data'] image_format = example['image_format'] bboxes = example['bbox'] xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box classes_text = example['class_text'] classes = example['class_idx'] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def create_tf_example(row): full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [class_text_to_int(row['class'])] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def testDecodeInstanceSegmentation(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertAllEqual( (tensor_dict[fields.InputDataFields.groundtruth_instance_masks] .get_shape().as_list()), [4, 5, 3]) self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes] .get_shape().as_list()), [4]) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( instance_masks.astype(np.float32), tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) self.assertAllEqual(object_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def dict_to_tf_example(data, label_map_dict, data_dir): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. """ img_path = os.path.join(data_dir, data.replace("mask", "images")) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = 512 height = 512 classes = [] classes_text = [] encoded_mask_png_list = [] mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255 output = io.BytesIO() encoded_mask_png_list.append(mask_png.save(output, mask_png)) class_name = 'water' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def createTfExample(singleFileData, path): # use TensorFlow's GFile function to open the .jpg image matching the current box data with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile: tensorFlowImage = tensorFlowImageFile.read() # end with # get the image width and height via converting from a TensorFlow image to an io library BytesIO image, # then to a PIL Image, then breaking out the width and height bytesIoImage = io.BytesIO(tensorFlowImage) pilImage = Image.open(bytesIoImage) width, height = pilImage.size # get the file name from the file data passed in, and set the image format to .jpg fileName = singleFileData.filename.encode('utf8') imageFormat = b'jpg' # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer xMins = [] xMaxs = [] yMins = [] yMaxs = [] classesAsText = [] classesAsInts = [] # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box) for index, row in singleFileData.object.iterrows(): xMins.append(row['xmin'] / width) xMaxs.append(row['xmax'] / width) yMins.append(row['ymin'] / height) yMaxs.append(row['ymax'] / height) classesAsText.append(row['class'].encode('utf8')) classesAsInts.append(classAsTextToClassAsInt(row['class'])) # end for # finally we can calculate and return the TensorFlow Example tfExample = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fileName), 'image/source_id': dataset_util.bytes_feature(fileName), 'image/encoded': dataset_util.bytes_feature(tensorFlowImage), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xMins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(yMins), 'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classesAsText), 'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)})) return tfExample
def create_tf_example(filename, writer): lines = open(filename).readlines() image_filename = lines[0].strip()[1:] classes_text = [] classes = [] xmins = [] xmaxs = [] ymins = [] ymaxs = [] im = Image.open(image_filename) arr = io.BytesIO() im.save(arr, format='PNG') height = im.height width = im.width encoded_image_data = arr.getvalue() image_format = 'png' for line in lines[1:]: line = line.strip() if line == '': continue data = line.split(",") bbox = list(map(int, map(float, data[:4]))) class_text = data[4].strip() class_idx = labels.index(class_text) classes_text.append(class_text) classes.append(class_idx) xmins.append(bbox[0]/float(width)) xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box) ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testInstancesNotAvailableByDefault(self): num_instances = 4 image_height = 5 image_width = 3 # Randomly generate image. image_tensor = np.random.randint( 256, size=(image_height, image_width, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) # Randomly generate instance segmentation masks. instance_masks = ( np.random.randint(2, size=(num_instances, image_height, image_width)).astype(np.float32)) instance_masks_flattened = np.reshape(instance_masks, [-1]) # Randomly generate class labels for each instance. object_classes = np.random.randint( 100, size=(num_instances)).astype(np.int64) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/object/mask': dataset_util.float_list_feature(instance_masks_flattened), 'image/object/class/label': dataset_util.int64_list_feature(object_classes) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) self.assertTrue( fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), })) writer.write(example.SerializeToString()) writer.close() return path
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0] * len(xmin_norm) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature(annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature(annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature(annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature(annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature(annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature(annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature(annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature(annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(annotations['3d_bbox_rot_y']), })) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult_obj.append(int(0)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) class_name = obj['name'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(0)) poses.append('Unspecified'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def prepare_example(image_path, annotations, label_map_dict): """ Converts a dictionary with annotations for an image to tf.Example proto. :param image_path: full path to the image :param annotations: a list object obtained by reading the annotation csv file :param label_map_dict: a map from string label names to integer ids. :return: example: The converted tf.Example. """ print("encoding %s" % image_path) with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) if image.format != 'PNG': raise ValueError('Image format error') key = hashlib.sha256(encoded_png).hexdigest() # obtain attributes width, height = image.size img_filename = image_path.split('/')[-1] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] occlud = [] xmin.append(int(annotations[2]) / width) ymin.append(int(annotations[3]) / height) xmax.append(int(annotations[4]) / width) ymax.append(int(annotations[5]) / height) class_name = annotations[1] classes_text.append(class_name) classes.append(label_map_dict[class_name]) classes_text = [class_text.encode('utf-8') for class_text in classes_text] trun, occ = annotations[6].split(',') truncated.append(int(trun)) occlud.append(int(occ)) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.int64_list_feature(occlud), })) return example
def dict_to_tf_example(filename, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, filename + '.jpg') with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() mask_cv = cv2.imread(mask_path) height, width, channels = mask_cv.shape ''' with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) # print("mask pixel", mask[400][200]) if mask.format != 'PNG': raise ValueError('Mask format not PNG') print("img_path:", img_path) print("mask_path:", mask_path) mask_np = np.asarray(mask) width = int(data['size']['width']) height = int(data['size']['height']) ''' xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] # masks = [] print('-filename', filename) bndboxes, masks, buttons_list = analize_mask(mask_cv) print( '--bndboxes length is {}, masks length is {} and buttons_list length is {}' .format(len(bndboxes), len(masks), len(buttons_list))) for index, bndbox in enumerate(bndboxes): xmin, xmax, ymin, ymax = bndbox difficult = bool(0) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) # classes_text.append(obj['name'].encode('utf8')) # classes.append(label_map_dict[obj['name']]) # print("label_map_dict[obj['name']]", label_map_dict[obj['name']]) class_name = buttons_list[index] print('---box ' + str(index + 1) + ' is class_name ' + class_name + ' with label_map_dict[class_name] # ' + str(label_map_dict[class_name]) + ' xmin: ' + str(xmin) + ', ymin: ' + str(ymin) + ', xmax: ' + str(xmax) + ', ymax: ' + str(ymax)) count_element(class_name) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(0) poses.append('Unspecified'.encode('utf8')) # mask_remapped = (mask_np != 2).astype(np.uint8) # masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def _create_tfexample(label_map_dict, image_id, encoded_image, encoded_next_image, depth, next_depth, flow, segmentation, extrinsics_dict, next_extrinsics_dict, tracking_rows, next_tracking_rows, segmentation_color_map, first_extrinsics_dict): frame_id = int(image_id.split('_')[1]) assert frame_id == extrinsics_dict[ 'frame'] == next_extrinsics_dict['frame'] - 1 next_tracking_row_map = {row['tid']: row for row in next_tracking_rows} height, width = depth.shape[:2] extrinsics = np.reshape( np.array(list(extrinsics_dict.values())[1:], dtype=np.float32), [4, 4]) next_extrinsics = np.reshape( np.array(list(next_extrinsics_dict.values())[1:], dtype=np.float32), [4, 4]) first_extrinsics = np.reshape( np.array(list(first_extrinsics_dict.values())[1:], dtype=np.float32), [4, 4]) camera_moving = not np.allclose(extrinsics, next_extrinsics) q_cam1 = q_from_rotation_matrix(extrinsics[:3, :3]) q_cam2 = q_from_rotation_matrix(next_extrinsics[:3, :3]) trans_cam1 = extrinsics[:3, 3] trans_cam2 = next_extrinsics[:3, 3] q_cam1_to_cam2 = q_difference(q_cam1, q_cam2) q_cam2_to_cam1 = q_conjugate(q_cam1_to_cam2) trans_cam1_to_cam2 = trans_cam2 - q_rotate(q_cam1_to_cam2, trans_cam1) trans_cam2_to_cam1 = trans_cam1 - q_rotate(q_cam2_to_cam1, trans_cam2) print(q_cam1_to_cam2) camera_motion = np.concatenate([ q_cam1_to_cam2, trans_cam1_to_cam2, np.array([camera_moving], dtype=np.float32) ]) boxes = [] masks = [] classes = [] motions = [] diff = 0 for row in tracking_rows: next_row = next_tracking_row_map.get(row['tid']) label = row['orig_label'] tid = row['tid'] # ensure object still tracked in next frame and visible in original frame if next_row is not None and row['occupr'] > 0.1: assert frame_id == row['frame'] == next_row['frame'] - 1 box = np.array([row['t'], row['l'], row['b'], row['r']], dtype=np.float64) boxes.append(box) class_id = label_map_dict[label.lower()] classes.append(class_id) # find out which color this object corresponds to in the segmentation image seg_r, seg_g, seg_b = segmentation_color_map['{}:{}'.format( label, tid)] mask = ((segmentation[:, :, 0] == seg_r).astype(np.uint8) + (segmentation[:, :, 1] == seg_g).astype(np.uint8) + (segmentation[:, :, 2] == seg_b).astype(np.uint8)) mask = (mask == 3).astype(np.uint8) masks.append(mask) moving = int(row['moving']) p1 = _get_pivot(row) p2 = _get_pivot(next_row) q1 = _get_q(row) q2 = _get_q(next_row) q = q_multiply(q2, q_multiply(q_cam1_to_cam2, q_conjugate(q1))) p2_cam1 = q_rotate(q_cam2_to_cam1, p2) + trans_cam2_to_cam1 trans = p2_cam1 - q_rotate(q, p1) if moving == 0: q = np.array([1, 0, 0, 0], dtype=np.float32) trans = np.array([0, 0, 0], dtype=np.float32) mv = np.array([moving], dtype=np.float32) motion = np.concatenate([q, trans, p1, mv]) diff += np.sum( np.abs( q_rotate(q_cam1_to_cam2, q_rotate(q, p1) + trans) + trans_cam1_to_cam2 - p2)) motions.append(motion) print(diff) if len(boxes) > 0: boxes = np.stack(boxes, axis=0) masks = np.stack(masks, axis=0) motions = np.stack(motions, axis=0) else: boxes = np.zeros((0, 5), dtype=np.float32) masks = np.zeros((0, height, width), dtype=np.float32) motions = np.zeros((0, 15), dtype=np.float32) num_instances = boxes.shape[0] ymins = (boxes[:, 0] / height).tolist() xmins = (boxes[:, 1] / width).tolist() ymaxs = (boxes[:, 2] / height).tolist() xmaxs = (boxes[:, 3] / width).tolist() index_0, index_1, index_2 = np.nonzero(masks) key = hashlib.sha256(encoded_image).hexdigest() camera_intrinsics = np.array([725.0, 620.5, 187.0], dtype=np.float32) if FLAGS.gt_rigid_flow_from_motion: example_flow = dense_flow_from_motion(np.expand_dims(depth, 2), motions, masks, camera_motion, camera_intrinsics) else: example_flow = flow example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_id.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_id.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'next_image/encoded': dataset_util.bytes_feature(encoded_next_image), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/motion': dataset_util.float_list_feature(motions.ravel().tolist()), 'image/segmentation/object/count': dataset_util.int64_feature(num_instances), 'image/segmentation/object/index_0': dataset_util.int64_list_feature(index_0.tolist()), 'image/segmentation/object/index_1': dataset_util.int64_list_feature(index_1.tolist()), 'image/segmentation/object/index_2': dataset_util.int64_list_feature(index_2.tolist()), 'image/segmentation/object/class': dataset_util.int64_list_feature(classes), 'image/depth': dataset_util.float_list_feature(depth.ravel().tolist()), 'next_image/depth': dataset_util.float_list_feature(next_depth.ravel().tolist()), 'image/flow': dataset_util.float_list_feature(example_flow.ravel().tolist()), 'image/camera/motion': dataset_util.float_list_feature(camera_motion.tolist()), 'image/camera/intrinsics': dataset_util.float_list_feature(camera_intrinsics.tolist()) })) return example, num_instances
def dict_to_tf_example(labels_corners, labels_center, labels_data, params, label_map_dict, image_dir, image_dir_beliefs, image_prefix, image_prev_prefix): width = round(params['pointcloud_grid_map_interface']['grids']['cartesian'] ['range']['y'] / params['pointcloud_grid_map_interface'] ['grids']['cartesian']['resolution']['y']) height = round( params['pointcloud_grid_map_interface']['grids']['cartesian']['range'] ['x'] / params['pointcloud_grid_map_interface']['grids']['cartesian'] ['resolution']['x']) xmin = [] ymin = [] xmax = [] ymax = [] x_c = [] y_c = [] w = [] h = [] angle = [] sin_angle = [] cos_angle = [] classes = [] classes_text = [] for idx, label_corner in enumerate(labels_corners): xmin.append(min(label_corner[0]) / width) ymin.append(min(label_corner[1]) / height) xmax.append(max(label_corner[0]) / width) ymax.append(max(label_corner[1]) / height) x_min = min(label_corner[0]) / width y_min = min(label_corner[1]) / height x_max = max(label_corner[0]) / width y_max = max(label_corner[1]) / height if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1): print(x_min, y_min, x_max, y_max) raise ValueError('Box Parameters greather than 1.0') if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0): raise ValueError('Box Parameters less than 0.0') x_c.append(labels_center[idx][0]) y_c.append(labels_center[idx][1]) angle_rad = _flipAngle(labels_data[idx].rz) angle.append(angle_rad) sin_angle.append(math.sin(2 * angle_rad)) cos_angle.append(math.cos(2 * angle_rad)) vec_s_x = math.cos(angle_rad) vec_s_y = math.sin(angle_rad) w_p = labels_data[idx].w / params['pointcloud_grid_map_interface'][ 'grids']['cartesian']['resolution']['y'] w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x / (width * width) + vec_s_y * vec_s_y / (height * height)) w.append(w_p_s) l_p = labels_data[idx].l / params['pointcloud_grid_map_interface'][ 'grids']['cartesian']['resolution']['x'] l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x / (height * height) + vec_s_y * vec_s_y / (width * width)) h.append(l_p_s) class_name = labels_data[idx].type classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) return tf.train.Example(features=tf.train.Features( feature={ 'id': dataset_util.bytes_feature(image_prefix.encode('utf8')), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'layers/height': dataset_util.int64_feature(height), 'layers/width': dataset_util.int64_feature(width), 'layers/detections/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'detections_cartesian')), 'layers/observations/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'observations_cartesian')), 'layers/decay_rate/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'decay_rate_cartesian')), 'layers/intensity/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'intensity_cartesian')), 'layers/zmin/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_min_detections_cartesian')), 'layers/zmax/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_max_detections_cartesian')), 'layers/occlusions/encoded': dataset_util.bytes_feature( _readImage(image_dir, image_prefix, 'z_max_occlusions_cartesian')), 'layers/bel_O_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'bel_O_FUSED_cartesian')), 'layers/bel_F_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'bel_F_FUSED_cartesian')), 'layers/bel_U_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'bel_U_FUSED_cartesian')), 'layers/detections_drivingCorridor_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'detections_drivingCorridor_FUSED_cartesian')), 'layers/z_max_detections_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'z_max_detections_FUSED_cartesian')), 'layers/z_min_detections_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'z_min_detections_FUSED_cartesian')), 'layers/observations_z_min_FUSED/encoded': dataset_util.bytes_feature( _readImage(image_dir_beliefs, image_prefix, 'observations_z_min_FUSED_cartesian')), # 'layers_prev/detections/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'detections_cartesian')), # 'layers_prev/observations/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'observations_cartesian')), # 'layers_prev/decay_rate/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'decay_rate_cartesian')), # 'layers_prev/intensity/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'intensity_cartesian')), # 'layers_prev/zmin/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'z_min_detections_cartesian')), # 'layers_prev/zmax/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'z_max_detections_cartesian')), # 'layers_prev/occlusions/encoded': dataset_util.bytes_feature( # _readImage(image_dir, image_prev_prefix, 'z_max_occlusions_cartesian')), 'boxes/aligned/x_min': dataset_util.float_list_feature(xmin), 'boxes/aligned/x_max': dataset_util.float_list_feature(xmax), 'boxes/aligned/y_min': dataset_util.float_list_feature(ymin), 'boxes/aligned/y_max': dataset_util.float_list_feature(ymax), 'boxes/inclined/x_c': dataset_util.float_list_feature(x_c), 'boxes/inclined/y_c': dataset_util.float_list_feature(y_c), 'boxes/inclined/w': dataset_util.float_list_feature(w), 'boxes/inclined/h': dataset_util.float_list_feature(h), 'boxes/inclined/angle': dataset_util.float_list_feature(angle), 'boxes/inclined/sin_angle': dataset_util.float_list_feature(sin_angle), 'boxes/inclined/cos_angle': dataset_util.float_list_feature(cos_angle), 'boxes/class/text': dataset_util.bytes_list_feature(classes_text), 'boxes/class/label': dataset_util.int64_list_feature(classes), }))
def create_tf_example(group, path): with tf.gfile.GFile( os.path.join(path, '{}'.format(group.filename.split("/")[-1])), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['Upper left corner X'] / width) xmaxs.append(row['Lower right corner X'] / width) ymins.append(row['Upper left corner Y'] / height) ymaxs.append(row['Lower right corner Y'] / height) lightColor = "" if "go" in row['Annotation tag']: lightColor = "Green" if "stop" in row['Annotation tag']: lightColor = "Red" if "warning" in row['Annotation tag']: lightColor = "Yellow" classes_text.append(lightColor.encode()) classes.append(int(LABEL_DICT[lightColor])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
embed() xmins.append(xmin / width) xmaxs.append(xmax / width) ymins.append(ymin / height) ymaxs.append(ymax / height) classes_text.append(tf.compat.as_bytes(text)) classes.append(index) # cv2.rectangle(_img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) # cv2.putText(_img, text, (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), lineType=cv2.LINE_AA) # _path = os.path.join(out_path, os.path.splitext(data[key]["filename"])[0]+"_ann"+os.path.splitext(data[key]["filename"])[1]) # cv2.imwrite(_path, _img) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin':
def create_tf_record(self, has_additional_channels=False, num_shards=1, num_examples_per_shard=1): def dummy_jpeg_fn(): image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) additional_channels_tensor = np.random.randint( 255, size=(4, 5, 1)).astype(np.uint8) encoded_jpeg = tf.image.encode_jpeg(image_tensor) encoded_additional_channels_jpeg = tf.image.encode_jpeg( additional_channels_tensor) return encoded_jpeg, encoded_additional_channels_jpeg encoded_jpeg, encoded_additional_channels_jpeg = self.execute( dummy_jpeg_fn, []) tmp_dir = self.get_temp_dir() flat_mask = (4 * 5) * [1.0] for i in range(num_shards): path = os.path.join(tmp_dir, '%05d.tfrecord' % i) writer = tf.python_io.TFRecordWriter(path) for j in range(num_examples_per_shard): if num_shards > 1: source_id = (str(i) + '_' + str(j)).encode() else: source_id = str(j).encode() features = { 'image/source_id': dataset_util.bytes_feature(source_id), 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/height': dataset_util.int64_feature(4), 'image/width': dataset_util.int64_feature(5), 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]), 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]), 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), } if has_additional_channels: additional_channels_key = 'image/additional_channels/encoded' features[ additional_channels_key] = dataset_util.bytes_list_feature( [encoded_additional_channels_jpeg] * 2) example = tf.train.Example(features=tf.train.Features( feature=features)) writer.write(example.SerializeToString()) writer.close() return os.path.join(self.get_temp_dir(), '?????.tfrecord')
def create_tf_example(example, mode): # Some images referenced in the xml aren't in the dataset try: activ_image = Image.open(join(example['path_to_image'], example['file_name']), mode='r') except: print("Could not find {0}; skipping".format( join(example['path_to_image'], example['file_name']))) return None if USE_GRAYSCALE: activ_image = activ_image.convert('L').convert('RGB') # Normalized x,y coordinates width, height = activ_image.size xmins = [x / float(width) for x in example['bbox_xmins']] xmaxs = [x / float(width) for x in example['bbox_xmaxs']] ymins = [y / float(height) for y in example['bbox_ymins']] ymaxs = [y / float(height) for y in example['bbox_ymaxs']] # Skip the image if it doesn't match INPUT_WIDTH x INPUT_HEIGHT if mode != "test" and ONE_IMAGE_SIZE and (height != INPUT_HEIGHT or width != INPUT_WIDTH): #print("Input image does not match expected size {0}x{1}; skipping".format(INPUT_WIDTH,INPUT_HEIGHT)) return None # If needed, resize now that the normalized box coordinates have been calculated if width > 1000 or height > 1000: basewidth = 1000 wpercent = (basewidth / float(width)) hsize = int((float(height) * float(wpercent))) activ_image = activ_image.resize((basewidth, hsize), Image.ANTIALIAS) width, height = activ_image.size imgByteArr = io.BytesIO() if example['extension'] in ['jpg', 'jpeg']: activ_image.save(imgByteArr, format='JPEG') else: activ_image.save(imgByteArr, format='PNG') encoded_image_data = imgByteArr.getvalue() # Encoded image bytes filename = example['file_name'].encode( 'utf-8') # Filename of the image. Empty if image is not from file image_format = example['image_format'] # b'jpeg' or b'png' # List of string class name of bounding box (1 per box) classes_text = [example['label'] for i in range(len(xmins))] # List of integer class id of bounding box (1 per box) classes = [example['label_num'] for i in range(len(xmins))] tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_record(data, label_map_dict, is_yaml=False, ignore_difficult_instances=False): """ Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: :param data: dict holding (XML or YAML) fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) :param label_map_dict: A map from string label names to integers ids. :param ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: :return tf_example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ with tf.gfile.GFile(data['path'], 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPG') key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if is_yaml: width = int(data['width']) height = int(data['height']) filename = data['path'].encode('utf8') for box in data['boxes']: difficult_obj.append(0) xmin.append(float(box['x_min']) / width) ymin.append(float(box['y_min']) / height) xmax.append(float(box['x_max']) / width) ymax.append(float(box['y_max']) / height) classes_text.append(box['label'].encode('utf8')) classes.append(label_map_dict[box['label']]) truncated.append(0) poses.append(r'Unspecified'.encode('utf8')) else: width = int(data['size']['width']) height = int(data['size']['height']) filename = data['filename'].encode('utf8') for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(r'jpg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return tf_example
def main(_): xml_file = FLAGS.xml logging.info('Reading from DLIB %s dataset.', xml_file) # find all labels in xml xml_data = xml.etree.ElementTree.parse( xml_file ).getroot() labels = find_all_labels_recursive(xml_data) labels = sorted(labels) print(labels) # create pbtxt label_map_filename = os.path.splitext(FLAGS.output_path)[0] + '.pbtxt' with open(label_map_filename,'w') as f: for idx,lbl in enumerate(labels): f.write("item {{\n id: {}\n name: '{}'\n}}\n\n".format(idx+1,lbl)) # create TF record file writer = tf.python_io.TFRecordWriter(FLAGS.output_path) xml_file_dir = os.path.dirname(xml_file) print('XML file directory is {}.'.format(xml_file_dir)) for img_node in xml_data.find('images'): # get image path img_filename = img_node.attrib['file'] if os.path.isabs( img_filename ): img_path = img_filename else: img_path = os.path.join(xml_file_dir,img_filename) # read image with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width,height = image.size print("Image {}: {}x{}".format(os.path.basename(img_filename),width,height)) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] difficult_obj = [] # read annotations for annot_node in img_node: label_node = annot_node.find('label') if label_node is not None: label = label_node.text else: print("Skipping annotation because label is empty") bbox = dict(ymin=int(annot_node.attrib['top']), xmin=int(annot_node.attrib['left']), xmax=int(annot_node.attrib['left'])+int(annot_node.attrib['width']), ymax=int(annot_node.attrib['top'])+int(annot_node.attrib['height'])) is_truncated = bbox['xmin'] < 0 or bbox['xmax'] >= width or bbox['ymin'] < 0 or bbox['ymax'] >= height is_difficult = False xmin.append(float(bbox['xmin']) / width) ymin.append(float(bbox['ymin']) / height) xmax.append(float(bbox['xmax']) / width) ymax.append(float(bbox['ymax']) / height) classes_text.append(label.encode('utf8')) classes.append(labels.index(label)+1) truncated.append(int(is_truncated)) difficult_obj.append(int(is_difficult)) if len(classes) > 0: example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), #'image/object/view': dataset_util.bytes_list_feature(poses), })) writer.write(example.SerializeToString()) # print(data) #examples_list = dataset_util.read_examples_list(examples_path) # for idx, example in enumerate(examples_list): # if idx % 100 == 0: # logging.info('On image %d of %d', idx, len(examples_list)) # path = os.path.join(annotations_dir, example + '.xml') # with tf.gfile.GFile(path, 'r') as fid: # xml_str = fid.read() # xml = etree.fromstring(xml_str) # data = dataset_util.recursive_parse_xml_to_dict(xml) # tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, # FLAGS.ignore_difficult_instances) # writer.write(tf_example.SerializeToString()) writer.close()
def dict_to_tf_example(data, label_map_dict): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ encoded_jpg_io = io.BytesIO() image = data['image'] image.save(encoded_jpg_io, "JPEG", quality=80) encoded_jpg = encoded_jpg_io.getvalue() key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmin = [] ymin = [] xmax = [] ymax = [] rotation = [] classes = [] classes_text = [] truncated = [] poses = [] masks = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) rotation.append(float(obj['rotation'])) masks.append(obj['mask']) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) mask = np.stack(masks) encoded_mask = pn_encode(mask.flatten()).tolist() mask_length = len(encoded_mask) print('mask encode:', mask.shape, '->', len(encoded_mask)) ### example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/rotation': dataset_util.float_list_feature(rotation), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), 'image/segmentation/object': dataset_util.int64_list_feature(encoded_mask), 'image/segmentation/length': dataset_util.int64_feature(mask_length), 'image/segmentation/object/class': dataset_util.int64_list_feature(classes), })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=True): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, dataset_directory, set_name, label_map_dict, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset set_name: name of the set training, validation or test label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(set_name, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' not in data: data['object'] = [] for obj in data['object']: if obj['name'] in label_map_dict: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding COWFACE XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict, FLAGS.ignore_difficult_instances) data: {'source': {'database': 'Unknown'}, 'object': [ { 'bndbox': {'xmin': '115', 'ymin': '5', 'ymax': '124', 'xmax': '261'}, 'difficult': '0', 'pose': 'Unspecified', 'name': 'dog', 'truncated': '0' } ], 'filename': '1.jpg', 'segmented': '0', 'path': '/home/liuhy/Downloads/dog/1.jpg', 'folder': 'dog', 'size': {'width': '323', 'depth': '3', 'height': '156'} } """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) # dog/JPEGImages/1.jpg full_path = os.path.join(dataset_directory, img_path) # FLAGS.data_dir/dog/JPEGImages/1.jpg with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(example: list): """ Creates a tf.train.Example object from an image and its labels which can be used in the training pipeline for the object detector. Args: example: list containing information about the image and its labels. Returns: information of example parsed into a tf.train.Example object """ width = int(example[0]) height = int(example[1]) filename = str.encode(example[2]) with tf.gfile.GFile(example[3], 'rb') as f: encoded_image_data = bytes(f.read()) image_format = b'jpg' boxes = example[5] xmins = [] ymins = [] xmaxs = [] ymaxs = [] for b in boxes: xmins.append(b[0]) ymins.append(b[1]) xmaxs.append(b[2]) ymaxs.append(b[3]) xmins = [x / width for x in xmins] xmaxs = [x / width for x in xmaxs] ymins = [y / height for y in ymins] ymaxs = [y / height for y in ymaxs] class_reader = ClassReader(known_class_ids_annotation_predictor) classes_text = example[4][:] classes = [] none_vals = [] for i, cls in enumerate(classes_text): if cls is None: none_vals.append(i) for index in sorted(none_vals, reverse=True): classes_text.pop(index) xmins.pop(index) ymins.pop(index) xmaxs.pop(index) ymaxs.pop(index) for i, cls in enumerate(classes_text): classes.append(class_reader.get_index_of_class_from_label(cls)) class_encoded = str.encode(cls) classes_text[i] = class_encoded tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] for obj in data['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) #class_name = get_class_name_from_filename(data['filename']) class_name = 'gazelle_head' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, example, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # for embedded folders like images/bw1/image1.jpg # path = data['path'].split(os.sep)[-2] # print(path, data['filename']) # if len(path) < 5 and 'bw' not in path: # path = 'bw' + path # img_path = os.path.join(image_subdirectory, path, data['filename']) # print(img_path) print(example) img_name = example.split(".")[0] img_path = os.path.join(image_subdirectory, img_name + '.jpg') with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) # class_name = get_class_name_from_filename(data['filename']) class_name = obj['name'].split(' ')[0] # class_name = 'pack' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def create_tf_example(group, path, is_pevid: bool = False): """Creates a TF example given a dataframe containing annotation information and input image path. This is only used for TFExample creation from the PEViD-UHD dataset""" if is_pevid: # Get and add the volume and 'frames' to the path volume_name = group.filename.split('_frame')[0] path = os.path.join(path, volume_name, 'frames') with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_examples(writer, anns, path, file_name, width, height, encoded_jpg): xmins, ymins = [], [] xmaxs, ymaxs = [], [] classes_text = [] classes = [] num_examples = 0 for ann in anns: xmin = ann['bbox'][0] ymin = ann['bbox'][1] w = ann['bbox'][2] h = ann['bbox'][3] xmax = xmin + w ymax = ymin + h # normalize xmin /= width xmax /= width ymin /= height ymax /= height if xmin < 1 and xmax < 1 and ymin < 1 and ymax < 1: xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append('Text'.encode('utf8')) classes.append(1) filename = os.path.join(path, file_name) filename = filename.encode('utf8') image_format = b'jpg' if len(xmins) != 0: tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) num_examples += 1 return num_examples
def create_tf_example(_filename, _encoded_image_data, _width, _height, _metadata): height = _height # Image height width = _width # Image width # if height != 300 or width != 300: # print "Invalid Image dimensions! " + str(width) + "x" + str(height) # exit() filename = _filename # Filename of the image. Empty if image is not from file encoded_image_data = _encoded_image_data # Encoded image bytes image_format = b'jpeg' # or b'png' metadata = _metadata xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) number_entries = int(next(metadata)) for i in range(0, number_entries): next_line = next(metadata).strip('\n') data = [float(entry) for entry in next_line.split(' ')[:-2]] centre_x = data[3] centre_y = data[4] r_major = data[0] * math.cos(data[2] * math.pi / 180) r_minor = data[1] * math.cos(data[2] * math.pi / 180) x = float(centre_x - r_minor) y = float(centre_y - r_major) w = float(r_minor * 2) h = float(r_major * 2) if math.isnan(x) or math.isnan(y) or math.isnan(w) or math.isnan( h) or height is 0 or width is 0: print "NAN!" exit() xmins.append(x / float(width)) xmaxs.append((x + w) / float(width)) ymins.append(y / float(height)) ymaxs.append((y + h) / float(height)) classes_text.append('face') classes.append(1) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) ''' print("mask.size", mask.size) px = mask.load() print ("pixel", px[400, 200]) print ("pixel", px[0, 0]) ''' # print("mask pixel", mask[400][200]) if mask.format != 'PNG': raise ValueError('Mask format not PNG') print("img_path:", img_path) print("mask_path:", mask_path) mask_np = np.asarray(mask) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) # classes_text.append(obj['name'].encode('utf8')) # classes.append(label_map_dict[obj['name']]) # print("label_map_dict[obj['name']]", label_map_dict[obj['name']]) class_name = get_class_name_from_filename(data['filename']) print('class_name', class_name, 'label_map_dict[class_name]', label_map_dict[class_name]) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = (dataset_util.float_list_feature( masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) print('-------------------') return example
def make_shape(max_num, width, height, bg_color=True, filename=None, label_map_dict=None): if bg_color: r_s = np.random.randint(20, 160) r = np.random.randint(r_s, r_s + random.randint(10, 90), width * height, np.uint8).reshape((width, height)) g_s = np.random.randint(20, 160) g = np.random.randint(g_s, g_s + +random.randint(10, 90), width * height, np.uint8).reshape((width, height)) b_s = np.random.randint(20, 160) b = np.random.randint(b_s, b_s + +random.randint(10, 90), width * height, np.uint8).reshape((width, height)) else: r = np.zeros((width, height), np.uint8) g = np.zeros((width, height), np.uint8) b = np.zeros((width, height), np.uint8) bg = cv2.merge([r, g, b]) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for i in range(max_num): label = random.randint(1, 2) x1, y1, x2, y2 = 0, 0, 0, 0 if label == 1: shp = getCircleShape(bg) x1, y1, x2, y2 = shp[0] - shp[2], shp[1] - shp[2], shp[0] + shp[2], shp[1] + shp[2] cv2.circle(bg, (shp[0], shp[1]), shp[2], getColor(), -1) label_text = 'circle' elif label == 2: shp = getRectShape(bg) x1, y1, x2, y2 = shp[0], shp[1], shp[2], shp[3] cv2.rectangle(bg, (shp[0], shp[1]), (shp[2], shp[3]), getColor(), -1) label_text = 'rect' xmins.append(float(x1 / width)) ymins.append(float(y1 / height)) xmaxs.append(float(x2 / width)) ymaxs.append(float(y2 / height)) classes.append(label_map_dict[label_text]) classes_text.append(label_text.encode('utf8')) truncated.append(0) poses.append("0".encode('utf8')) difficult_obj.append(0) filename=filename + '_temp.jpg' img_path = os.path.join('./data', filename + '_temp.jpg') cv2.imwrite(img_path, bg) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) os.remove(img_path) return example
def dict_to_tf_example(data,label_map_dict,image_dir): # TODO(user): Populate the following variables from your example. height = int(data['size']['height']) # Image height width = int(data['size']['width']) # Image width s = data['filename'] if s[(len(s)-3):len(s)] == 'png': s = s[0 :(len(s)-3)]+'jpg' filename = os.path.join(image_dir,s) # Filename of the image. Empty if image is not from file # encoded_image_data = None # Encoded image bytes with tf.gfile.GFile(filename, 'rb') as fid: encoded_image_data = fid.read() encoded_image_data_io = io.BytesIO(encoded_image_data) image = PIL.Image.open(encoded_image_data_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') # key = hashlib.sha256(encoded_image_data).hexdigest() image_format = b'jpeg' # b'jpeg' or b'png' xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) if 'object' in data : for obj in data['object']: print(obj) if obj: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = obj['name'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def json_to_tf_example(json_data, dataset_directory, label_map_dict ): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ width = int(json_data.get("image_width")) height = int(json_data.get("image_height")) filename=orig_filename = json_data.get("filename") full_path=orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename) with tf.gfile.GFile(orig_full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width,height=image.size # filename = json_data.get("filename")+".scaled.jpg" # #img_path = os.path.join(FLAGS.data_dir,"photos", filename) # full_path = os.path.join(FLAGS.data_dir,"photos", filename) # if not os.path.exists(full_path): # #raise ValueError('Please scale image :convert abc.jpg -resize 756x1008 sss.jpg') # orig_filename = json_data.get("filename") # orig_full_path = os.path.join(FLAGS.data_dir,"photos", orig_filename) # #image = PIL.Image.open(orig_full_path) # ##image.resize((756,1008), resample=PIL.Image.BILINEAR).save(full_path) # #image.resize((756,1008), resample=PIL.Image.NEAREST).save(full_path) # os.system("convert "+orig_full_path+" -resize 756x1008 "+full_path) #full_path = os.path.join(dataset_directory, img_path) #with tf.gfile.GFile(full_path, 'rb') as fid: # encoded_jpg = fid.read() #encoded_jpg_io = io.BytesIO(encoded_jpg) #image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width,height=image.size; xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in json_data.get("bndboxes"): difficult_obj.append(0) xmin.append(float(obj.get("x")) / width) ymin.append(float(obj.get("y")) / height) xmax.append(numpy.clip(float(obj.get("x")+obj.get("w")) / width,0,1)) ymax.append(numpy.clip(float(obj.get("y")+obj.get("h")) / height,0,1)) classes_text.append(obj.get("id").encode('utf8')) classes.append(label_map_dict[obj.get("id")]) truncated.append(int(0)) poses.append("Unspecified") width,height=image.size example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(example): filename = example['filename'] filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width, height = image.size if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() xmins = [] # left x-coordinate ymins = [] # right x-coordinate xmaxs = [] # top y-coordinate ymaxs = [] # buttom y-coordinate classes = [] # class id classes_text = [] # class name for box in example['annotations']: xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin'] + box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABELS_MAP[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes) })) return tf_example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature( annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature( annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature( annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature( annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature( annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature( annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature( annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature( annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( annotations['3d_bbox_rot_y']), })) return example
def generate_tfrecord(group, path, label_dict): # Open image file with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as gf: encoded_jpg = gf.read() jpg_io = io.BytesIO(encoded_jpg) img = Image.open(jpg_io) image_width, image_height = img.size filename = group.filename.encode('utf8') img_format = b'jpg' # For the data in regards to the image # we need to store the minimum and maximum x and y positions of the bounding boxes bx_min = [] bx_max = [] by_min = [] by_max = [] classes = [] classes_texts = [] # Itterate over every row of data in our given data in the dataset for i, row in group.object.iterrows(): bx_min.append(row['xmin'] / image_width) bx_max.append(row['xmax'] / image_width) by_min.append(row['ymin'] / image_height) by_max.append(row['ymax'] / image_height) # Add Class text and the converted labels to classes_texts.append(row['class'].encode('utf8')) #print(row['class'], " Id from label_to_int: ", label_to_int(row['class'], label_dict)) classes.append(label_to_int(row['class'], label_dict)) # TF record example tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'img/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(img_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(bx_min), 'image/object/bbox/xmax': dataset_util.float_list_feature(bx_max), 'image/object/bbox/ymin': dataset_util.float_list_feature(by_min), 'image/object/bbox/ymax': dataset_util.float_list_feature(by_max), 'image/object/class/text': dataset_util.bytes_list_feature(classes_texts), 'image/object/class/label': dataset_util.int64_list_feature(classes) })) return tf_example
def create_tf_record(output_filename, num_shards, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): img_path = os.path.join(read_bucket, example) if not os.path.isfile(img_path): continue with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] # 'coke', 'pepsi', 'coke'... classes = [] # 1, 2, 1... difficult_obj = [] truncated = [] poses = [] for annotation in annotations[example]: xmins.append(annotation['x']) xmaxs.append(annotation['x2']) ymins.append(annotation['y']) ymaxs.append(annotation['y2']) classes_text.append(annotation['label'].encode('utf8')) classes.append(1) # temporary, I need to assign labels to actual ids difficult_obj.append(0) truncated.append(0) poses.append(''.encode('utf8')) try: feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(example.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(example.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: print('Invalid example, ignoring.')
def create_tf_example(example): # Bosch #height = 720 # Image height #width = 1280 # Image width # Udacity data set height = 600 # Image height width = 800 # Image width filename = example[ 'filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin'] + box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: data corresponding to each image file. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = image_subdirectory with tf.gfile.GFile(img_path) as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'PNG': raise ValueError('Image format error') # bg = PIL.Image.new("RGB", image.size, (255,255,255)) # x, y = image.size # bg.paste(image) # img_path = img_path[:-3] + 'jpg' # bg.save(img_path) # with tf.gfile.GFile(img_path) as fid: # encoded_jpg = fid.read() # encoded_jpg_io = io.BytesIO(encoded_jpg) # image = PIL.Image.open(encoded_jpg_io) # if image.format != 'JPEG': # raise ValueError('sase') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size img_filename = img_path.split('/')[-1] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] occlud = [] xmin.append(int(data[2]) / width) ymin.append(int(data[3]) / height) xmax.append(int(data[4]) / width) ymax.append(int(data[5]) / height) class_name = data[1] classes_text.append(class_name) classes.append(label_map_dict[class_name]) trun, occ = data[6].split(',') truncated.append(int(trun)) occlud.append(int(occ)) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_filename), 'image/source_id': dataset_util.bytes_feature(img_filename), 'image/key/sha256': dataset_util.bytes_feature(key), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('png'), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.int64_list_feature(occlud), })) return example
def dict_to_tf_example(data, label_map_dict, img_path): with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = image.width height = image.height xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] for shape in data['Layers']['Layer']['Shapes']['Shape']: text = shape['BlockText']['Text'].text if not (text.startswith('Panel') or text.startswith('panel')): continue attrib = shape['Data']['Extent'].attrib x = float(attrib['X']) y = float(attrib['Y']) w = float(attrib['Width']) h = float(attrib['Height']) xmin = x xmax = x + w ymin = y ymax = y + h xmin /= width ymin /= height xmax /= width ymax /= height if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01: print(img_path) xmins.append(xmin) ymins.append(ymin) xmaxs.append(xmax) ymaxs.append(ymax) class_name = 'Panel' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example