def dict_to_tf_example(data, label_map_dict, data_dir): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. """ img_path = os.path.join(data_dir, data.replace("mask", "images")) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = 512 height = 512 classes = [] classes_text = [] encoded_mask_png_list = [] mask_png = cv2.imread(os.path.join(data_dir, data), 0)/255 output = io.BytesIO() encoded_mask_png_list.append(mask_png.save(output, mask_png)) class_name = 'water' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def create_tf_example(row): full_path = os.path.join(os.getcwd(), 'images', '{}'.format(row['filename'])) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = row['filename'].encode('utf8') image_format = b'jpg' xmins = [row['xmin'] / width] xmaxs = [row['xmax'] / width] ymins = [row['ymin'] / height] ymaxs = [row['ymax'] / height] classes_text = [row['class'].encode('utf8')] classes = [class_text_to_int(row['class'])] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example, writer): height = example['height'] width = example['width'] filename = example['filename'] encoded_image_data = example['encoded_image_data'] image_format = example['image_format'] bboxes = example['bbox'] xmins = [bbox[0]/float(width) for bbox in bboxes] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [bbox[2]/float(width) for bbox in bboxes] # List of normalized right x coordinates in bounding box ymins = [bbox[1]/float(height) for bbox in bboxes] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [bbox[3]/float(height) for bbox in bboxes] # List of normalized bottom y coordinates in bounding box classes_text = example['class_text'] classes = example['class_idx'] tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testDecodePngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8) encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png') decoded_png_1 = np.squeeze(mask_1.astype(np.float32)) encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png') decoded_png_2 = np.squeeze(mask_2.astype(np.float32)) encoded_masks = [encoded_png_1, encoded_png_2] decoded_masks = np.stack([decoded_png_1, decoded_png_2]) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks) })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( decoded_masks, tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeEmptyPngInstanceMasks(self): image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) encoded_masks = [] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/mask': dataset_util.bytes_list_feature(encoded_masks), 'image/height': dataset_util.int64_feature(10), 'image/width': dataset_util.int64_feature(10), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape, [0, 10, 10])
def testDecodeAdditionalChannels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) additional_channel_tensor = np.random.randint( 256, size=(4, 5, 1)).astype(np.uint8) encoded_additional_channel = self._EncodeImage(additional_channel_tensor) decoded_additional_channel = self._DecodeImage(encoded_additional_channel) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/additional_channels/encoded': dataset_util.bytes_list_feature( [encoded_additional_channel] * 2), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/source_id': dataset_util.bytes_feature('image_id'), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder( num_additional_channels=2) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertAllEqual( np.concatenate([decoded_additional_channel] * 2, axis=2), tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeImageLabels(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/class/label': dataset_util.int64_list_feature([1, 2]), })).SerializeToString() example_decoder = tf_example_decoder.TfExampleDecoder() tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 2])) example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/class/text': dataset_util.bytes_list_feature(['dog', 'cat']), })).SerializeToString() label_map_string = """ item { id:3 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertTrue( fields.InputDataFields.groundtruth_image_classes in tensor_dict) self.assertAllEqual( tensor_dict[fields.InputDataFields.groundtruth_image_classes], np.array([1, 3]))
def createTfExample(singleFileData, path): # use TensorFlow's GFile function to open the .jpg image matching the current box data with tf.gfile.GFile(os.path.join(path, '{}'.format(singleFileData.filename)), 'rb') as tensorFlowImageFile: tensorFlowImage = tensorFlowImageFile.read() # end with # get the image width and height via converting from a TensorFlow image to an io library BytesIO image, # then to a PIL Image, then breaking out the width and height bytesIoImage = io.BytesIO(tensorFlowImage) pilImage = Image.open(bytesIoImage) width, height = pilImage.size # get the file name from the file data passed in, and set the image format to .jpg fileName = singleFileData.filename.encode('utf8') imageFormat = b'jpg' # declare empty lists for the box x, y, mins and maxes, and the class as text and as an integer xMins = [] xMaxs = [] yMins = [] yMaxs = [] classesAsText = [] classesAsInts = [] # for each row in the current .xml file's data . . . (each row in the .xml file corresponds to one box) for index, row in singleFileData.object.iterrows(): xMins.append(row['xmin'] / width) xMaxs.append(row['xmax'] / width) yMins.append(row['ymin'] / height) yMaxs.append(row['ymax'] / height) classesAsText.append(row['class'].encode('utf8')) classesAsInts.append(classAsTextToClassAsInt(row['class'])) # end for # finally we can calculate and return the TensorFlow Example tfExample = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(fileName), 'image/source_id': dataset_util.bytes_feature(fileName), 'image/encoded': dataset_util.bytes_feature(tensorFlowImage), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xMins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xMaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(yMins), 'image/object/bbox/ymax': dataset_util.float_list_feature(yMaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classesAsText), 'image/object/class/label': dataset_util.int64_list_feature(classesAsInts)})) return tfExample
def create_tf_example(filename, writer): lines = open(filename).readlines() image_filename = lines[0].strip()[1:] classes_text = [] classes = [] xmins = [] xmaxs = [] ymins = [] ymaxs = [] im = Image.open(image_filename) arr = io.BytesIO() im.save(arr, format='PNG') height = im.height width = im.width encoded_image_data = arr.getvalue() image_format = 'png' for line in lines[1:]: line = line.strip() if line == '': continue data = line.split(",") bbox = list(map(int, map(float, data[:4]))) class_text = data[4].strip() class_idx = labels.index(class_text) classes_text.append(class_text) classes.append(class_idx) xmins.append(bbox[0]/float(width)) xmaxs.append(bbox[2]/float(width)) # List of normalized right x coordinates in bounding box ymins.append(bbox[1]/float(height)) # List of normalized top y coordinates in bounding box (1 per box) ymaxs.append(bbox[3]/float(height)) # List of normalized bottom y coordinates in bounding box tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/source_id': dataset_util.bytes_feature(bytes(filename, "utf-8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(bytes(image_format, "utf-8")), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature([bytes(t, "utf-8") for t in classes_text]), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString())
def testDecodeObjectLabelWithText(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = ['cat', 'dog'] # Annotation label gets overridden by labelmap id. annotated_bbox_classes = [3, 4] expected_bbox_classes = [1, 2] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(annotated_bbox_classes), })).SerializeToString() label_map_string = """ item { id:1 name:'cat' } item { id:2 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) init = tf.tables_initializer() with self.test_session() as sess: sess.run(init) tensor_dict = sess.run(tensor_dict) self.assertAllEqual(expected_bbox_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) bbox_classes_text = ['cat', 'cheetah'] bbox_classes_id = [5, 6] example = tf.train.Example( features=tf.train.Features( feature={ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg), 'image/format': dataset_util.bytes_feature('jpeg'), 'image/object/class/text': dataset_util.bytes_list_feature(bbox_classes_text), 'image/object/class/label': dataset_util.int64_list_feature(bbox_classes_id), })).SerializeToString() label_map_string = """ item { name:'/m/cat' id:3 display_name:'cat' } item { name:'/m/dog' id:1 display_name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) example_decoder = tf_example_decoder.TfExampleDecoder( label_map_proto_file=label_map_path) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) with self.test_session() as sess: sess.run(tf.tables_initializer()) tensor_dict = sess.run(tensor_dict) self.assertAllEqual([3, -1], tensor_dict[fields.InputDataFields.groundtruth_classes])
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_text_to_int(row['class'])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, image_subdirectory): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] if 'object' in data: for obj in data['object']: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = obj['name'] print("%s: Added class %s (%d)" % (data['filename'], class_name, class_text_to_int(class_name))) classes_text.append(class_name.encode('utf8')) classes.append(class_text_to_int(class_name)) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(labels_image, label_data, params, label_map_dict, image_dir, image_prefix): fn_dec_rate = os.path.join(image_dir, image_prefix + '_' + 'decay_rate' + '.png') img_dec_rate = cv2.imread(fn_dec_rate, 0) fn_int = os.path.join(image_dir, image_prefix + '_' + 'intensity' + '.png') img_int = cv2.imread(fn_int, 0) fn_zmax = os.path.join(image_dir, image_prefix + '_' + 'zmax' + '.png') img_zmax = cv2.imread(fn_zmax, 0) img = np.stack([img_dec_rate, img_int, img_zmax], axis=-1) img_output = '/home/fischer/U/kitti/record/hack/img/test.png' cv2.imwrite(img_output, img) encoded_png, key = _readImage(img_output) width = int(params['batch_processor']['width'] / params['batch_processor']['resolution']) height = int(params['batch_processor']['length'] / params['batch_processor']['resolution']) xmin = [] ymin = [] xmax = [] ymax = [] x_c = [] y_c = [] w = [] h = [] angle = [] sin_angle = [] cos_angle = [] classes = [] classes_text = [] for idx, label_img in enumerate(labels_image): xmin.append(int(min(label_img[0])) / width) ymin.append(int(min(label_img[1])) / height) xmax.append(int(max(label_img[0])) / width) ymax.append(int(max(label_img[1])) / height) x_min = int(min(label_img[0])) / width y_min = int(min(label_img[1])) / height x_max = int(max(label_img[0])) / width y_max = int(max(label_img[1])) / height if (x_min >= 1) or (y_min >= 1) or (x_max >= 1) or (y_max >= 1): print('Higher:', x_min, y_min, x_max, y_max) if (x_min <= 0) or (y_min <= 0) or (x_max <= 0) or (y_max <= 0): print('Lower:', x_min, y_min, x_max, y_max) x_c.append( (int(min(label_img[0])) + int(max(label_img[0]))) / (2 * width)) y_c.append( (int(min(label_img[1])) + int(max(label_img[1]))) / (2 * height)) angle_rad = _flipAngle(label_data[idx].ry) angle.append(angle_rad) sin_angle.append(math.sin(2 * angle_rad)) cos_angle.append(math.cos(2 * angle_rad)) vec_s_x = math.cos(angle_rad) vec_s_y = math.sin(angle_rad) w_p = label_data[idx].w / params['batch_processor']['resolution'] w_p_s = w_p * math.sqrt(vec_s_x * vec_s_x / (height * height) + vec_s_y * vec_s_y / (width * width)) w.append(w_p_s) l_p = label_data[idx].l / params['batch_processor']['resolution'] l_p_s = l_p * math.sqrt(vec_s_x * vec_s_x / (width * width) + vec_s_y * vec_s_y / (height * height)) h.append(l_p_s) class_name = label_data[idx].type classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) return tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_prefix.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_prefix.encode('utf8')), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'boxes/inclined/x_c': dataset_util.float_list_feature(x_c), 'boxes/inclined/y_c': dataset_util.float_list_feature(y_c), 'boxes/inclined/w': dataset_util.float_list_feature(w), 'boxes/inclined/h': dataset_util.float_list_feature(h), 'boxes/inclined/angle': dataset_util.float_list_feature(angle), 'boxes/inclined/sin_angle': dataset_util.float_list_feature(sin_angle), 'boxes/inclined/cos_angle': dataset_util.float_list_feature(cos_angle), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), }))
def xml_to_tf(path_input, path_output): xml_list = [] column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] writer = tf.compat.v1.python_io.TFRecordWriter(path_output) files = os.listdir(path_input) for file in files: if file.endswith(".xml"): xmlFile = path_input + file tree = ET.parse(xmlFile) root = tree.getroot() filename = root[1].text width = int(root[4][0].text) height = int(root[4][1].text) xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for member in root.findall('object'): beer = member[0].text xmin = int(member[4][0].text) ymin = int(member[4][1].text) xmax = int(member[4][2].text) ymax = int(member[4][3].text) xmins.append(xmin/width) xmaxs.append(xmax/width) ymins.append(ymin/height) ymaxs.append(ymax/height) classes_text.append(beer.encode('utf8')) classes.append(class_text_to_int(beer)) with tf.io.gfile.GFile(os.path.join(path_input, '{}'.format(filename)), 'rb') as fid: encoded_jpg = fid.read() tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(IMAGE_FORMAT), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), path_output) print('Successfully created the TFRecords: {}'.format(output_path))
def create_tf_example(group, path, class_dict): with tf.io.gfile.GFile(os.path.join(path, "{}".format(group.filename)), "rb") as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode("utf8") image_format = b"jpg" xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): if set(["xmin_rel", "xmax_rel", "ymin_rel", "ymax_rel"]).issubset(set(row.index)): xmin = row["xmin_rel"] xmax = row["xmax_rel"] ymin = row["ymin_rel"] ymax = row["ymax_rel"] elif set(["xmin", "xmax", "ymin", "ymax"]).issubset(set(row.index)): xmin = row["xmin"] / width xmax = row["xmax"] / width ymin = row["ymin"] / height ymax = row["ymax"] / height xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(row["class"].encode("utf8")) classes.append(class_dict[row["class"]]) tf_example = tf.train.Example(features=tf.train.Features( feature={ "image/height": dataset_util.int64_feature(height), "image/width": dataset_util.int64_feature(width), "image/filename": dataset_util.bytes_feature(filename), "image/source_id": dataset_util.bytes_feature(filename), "image/encoded": dataset_util.bytes_feature(encoded_jpg), "image/format": dataset_util.bytes_feature(image_format), "image/object/bbox/xmin": dataset_util.float_list_feature(xmins), "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs), "image/object/bbox/ymin": dataset_util.float_list_feature(ymins), "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs), "image/object/class/text": dataset_util.bytes_list_feature(classes_text), "image/object/class/label": dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(img_path, json_path, label_map_dict, ignore_difficult_instances=False): with tf.gfile.GFile(json_path, 'r') as fid: json_str = fid.read() data = json.loads(json_str) #data = dataset_util.recursive_parse_xml_to_dict(xml) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if len(data["shapes"]) > 1: print("problem img {}".format(data["imagePath"])) for obj in data['shapes']: difficult = False if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) for p in obj["points"]: if p[0] >= width: print("problem {} and img size {} and img {}".format( p, image.size, data["imagePath"])) p[0] = width - 1 if p[0] < 0: print("problem {} and img size {} and img {}".format( p, image.size, data["imagePath"])) p[0] = 0 if p[1] >= height: print("problem {} and img size {} and img {}".format( p, image.size, data["imagePath"])) p[1] = height - 1 if p[1] < 0: print("problem {} and img size {} and img {}".format( p, image.size, data["imagePath"])) p[1] = 0 s_xmin = min([p[0] for p in obj["points"]]) s_ymin = min([p[1] for p in obj["points"]]) s_xmax = max([p[0] for p in obj["points"]]) s_ymax = max([p[1] for p in obj["points"]]) xmin.append(max(float(s_xmin) / width, 0)) ymin.append(max(float(s_ymin) / height, 0)) xmax.append(min(float(s_xmax) / width, 1.0)) ymax.append(min(float(s_ymax) / height, 1.0)) classes_text.append(obj["label"].encode('utf8')) classes.append(label_map_dict[obj["label"]]) truncated.append(int(0)) poses.append("Unspecified".encode('utf8')) # create masks polygon = [tuple(p) for p in obj["points"]] img = Image.new('L', (width, height), 0) ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1) mask = numpy.array(img) masks.append(mask) #img.save("mask.png") feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['imagePath'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['imagePath'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } encoded_mask_png_list = [] for mask in masks: img = Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def txt_to_tf_example(txt, img_path, label_map_dict): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # img_path = os.path.join(image_subdirectory, data['filename']) _, filename = os.path.split(img_path) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image {} format not JPEG'.format(img_path)) key = hashlib.sha256(encoded_jpg).hexdigest() width = int(image.size[0]) height = int(image.size[1]) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in txt.split('\n'): if obj == '': continue difficult_obj.append(0) obj_data = obj.split(' ') xmin.append(float(obj_data[1]) - float(obj_data[3]) / 2) ymin.append(float(obj_data[2]) - float(obj_data[4]) / 2) xmax.append(float(obj_data[1]) + float(obj_data[3]) / 2) ymax.append(float(obj_data[2]) + float(obj_data[4]) / 2) # class_name = get_class_name_from_filename(data['filename']) # classes_text.append(class_name.encode('utf8')) # classes.append(label_map_dict[class_name]) for name, idx in label_map_dict.items(): if idx == int(obj_data[0]): classes_text.append(name.encode('utf8')) classes.append(int(obj_data[0])) truncated.append(0) poses.append('Frontal'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(filename, mask_path, label_map_dict, img_path): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: filename: name of the image mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by filename is not a valid JPEG """ with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width = np.asarray(image).shape[1] height = np.asarray(image).shape[0] if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) mask_np = np.asarray(mask.convert('L')) if mask.format != 'PNG': raise ValueError('Mask format not PNG') xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for k in list(mask_pixel.keys()): class_name = k nonbackground_indices_x = np.any(mask_np == mask_pixel[class_name], axis=0) nonbackground_indices_y = np.any(mask_np == mask_pixel[class_name], axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) if np.asarray(nonzero_x_indices).shape[1] > 0 and np.asarray( nonzero_y_indices).shape[1] > 0: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) print(filename, 'bounding box for', class_name, xmin, xmax, ymin, ymax) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) mask_remapped = (mask_np == mask_pixel[class_name]).astype( np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False, keypoint_annotations_dict=None, densepose_annotations_dict=None, remove_non_person_annotations=False, remove_non_person_images=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. keypoint_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the keypoint information for this person object annotation. If None, then no keypoint annotations will be populated. densepose_annotations_dict: A dictionary that maps from annotation_id to a dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V'] representing part surface coordinates. For more information see http://densepose.org/. remove_non_person_annotations: Whether to remove any annotations that are not the "person" class. remove_non_person_images: Whether to remove any images that do not contain at least one "person" annotation. Returns: key: SHA256 hash of the image. example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. num_keypoint_annotation_skipped: Number of keypoint annotations that were skipped. num_densepose_annotation_skipped: Number of DensePose annotations that were skipped. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] keypoints_x = [] keypoints_y = [] keypoints_visibility = [] keypoints_name = [] num_keypoints = [] include_keypoint = keypoint_annotations_dict is not None num_annotations_skipped = 0 num_keypoint_annotation_used = 0 num_keypoint_annotation_skipped = 0 dp_part_index = [] dp_x = [] dp_y = [] dp_u = [] dp_v = [] dp_num_points = [] densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox'] include_densepose = densepose_annotations_dict is not None num_densepose_annotation_used = 0 num_densepose_annotation_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue category_id = int(object_annotations['category_id']) category_name = category_index[category_id]['name'].encode('utf8') if remove_non_person_annotations and category_name != b'person': num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_ids.append(category_id) category_names.append(category_name) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) if include_keypoint: annotation_id = object_annotations['id'] if annotation_id in keypoint_annotations_dict: num_keypoint_annotation_used += 1 keypoint_annotations = keypoint_annotations_dict[annotation_id] keypoints = keypoint_annotations['keypoints'] num_kpts = keypoint_annotations['num_keypoints'] keypoints_x_abs = keypoints[::3] keypoints_x.extend( [float(x_abs) / image_width for x_abs in keypoints_x_abs]) keypoints_y_abs = keypoints[1::3] keypoints_y.extend( [float(y_abs) / image_height for y_abs in keypoints_y_abs]) keypoints_visibility.extend(keypoints[2::3]) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(num_kpts) else: keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES)) keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES)) keypoints_name.extend(_COCO_KEYPOINT_NAMES) num_keypoints.append(0) if include_densepose: annotation_id = object_annotations['id'] if (annotation_id in densepose_annotations_dict and all(key in densepose_annotations_dict[annotation_id] for key in densepose_keys)): dp_annotations = densepose_annotations_dict[annotation_id] num_densepose_annotation_used += 1 dp_num_points.append(len(dp_annotations['dp_I'])) dp_part_index.extend([ int(i - _DP_PART_ID_OFFSET) for i in dp_annotations['dp_I'] ]) # DensePose surface coordinates are defined on a [256, 256] grid # relative to each instance box (i.e. absolute coordinates in range # [0., 256.]). The following converts the coordinates # so that they are expressed in normalized image coordinates. dp_x_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_x'] ] dp_x_norm = [(float(x) + x_box_rel * width) / image_width for x_box_rel in dp_x_box_rel] dp_y_box_rel = [ clip_to_unit(val / 256.) for val in dp_annotations['dp_y'] ] dp_y_norm = [(float(y) + y_box_rel * height) / image_height for y_box_rel in dp_y_box_rel] dp_x.extend(dp_x_norm) dp_y.extend(dp_y_norm) dp_u.extend(dp_annotations['dp_U']) dp_v.extend(dp_annotations['dp_V']) else: dp_num_points.append(0) if (remove_non_person_images and not any(name == b'person' for name in category_names)): return (key, None, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) if include_keypoint: feature_dict['image/object/keypoint/x'] = ( dataset_util.float_list_feature(keypoints_x)) feature_dict['image/object/keypoint/y'] = ( dataset_util.float_list_feature(keypoints_y)) feature_dict['image/object/keypoint/num'] = ( dataset_util.int64_list_feature(num_keypoints)) feature_dict['image/object/keypoint/visibility'] = ( dataset_util.int64_list_feature(keypoints_visibility)) feature_dict['image/object/keypoint/text'] = ( dataset_util.bytes_list_feature(keypoints_name)) num_keypoint_annotation_skipped = (len(keypoint_annotations_dict) - num_keypoint_annotation_used) if include_densepose: feature_dict['image/object/densepose/num'] = ( dataset_util.int64_list_feature(dp_num_points)) feature_dict['image/object/densepose/part_index'] = ( dataset_util.int64_list_feature(dp_part_index)) feature_dict['image/object/densepose/x'] = ( dataset_util.float_list_feature(dp_x)) feature_dict['image/object/densepose/y'] = ( dataset_util.float_list_feature(dp_y)) feature_dict['image/object/densepose/u'] = ( dataset_util.float_list_feature(dp_u)) feature_dict['image/object/densepose/v'] = ( dataset_util.float_list_feature(dp_v)) num_densepose_annotation_skipped = (len(densepose_annotations_dict) - num_densepose_annotation_used) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return (key, example, num_annotations_skipped, num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
def process(self, image_id): """Builds a tf.Example given an image id. Args: image_id: the image id of the associated image Returns: List of tf.Examples. """ image = self._image_dict[image_id] annotations = self._annotation_dict[image_id] image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] image_location_id = image['location'] image_datetime = str(image['date_captured']) image_sequence_id = str(image['seq_id']) image_sequence_num_frames = int(image['seq_num_frames']) image_sequence_frame_num = int(image['frame_num']) full_path = os.path.join(self._image_directory, filename) try: # Ensure the image exists and is not corrupted with tf.io.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) # Ensure the image can be read by tf with tf.Graph().as_default(): image = tf.image.decode_jpeg(encoded_jpg, channels=3) init_op = tf.initialize_all_tables() with tf.Session() as sess: sess.run(init_op) sess.run(image) except Exception as e: # pylint: disable=broad-except # The image file is missing or corrupt tf.logging.error(str(e)) return [] key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/location': dataset_util.bytes_feature(str(image_location_id).encode('utf8')), 'image/seq_num_frames': dataset_util.int64_feature(image_sequence_num_frames), 'image/seq_frame_num': dataset_util.int64_feature(image_sequence_frame_num), 'image/seq_id': dataset_util.bytes_feature(image_sequence_id.encode('utf8')), 'image/date_captured': dataset_util.bytes_feature(image_datetime.encode('utf8')) } num_annotations_skipped = 0 if annotations: xmin = [] xmax = [] ymin = [] ymax = [] category_names = [] category_ids = [] area = [] for object_annotations in annotations: if 'bbox' in object_annotations and self._keep_bboxes: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) if 'area' in object_annotations: area.append(object_annotations['area']) else: # approximate area using l*w/2 area.append(width * height / 2.0) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( self._category_dict[category_id]['name'].encode('utf8')) feature_dict.update({ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/area': dataset_util.float_list_feature(area), }) # For classification, add the first category to image/class/label and # image/class/text if not category_ids: feature_dict.update({ 'image/class/label': dataset_util.int64_list_feature([0]), 'image/class/text': dataset_util.bytes_list_feature(['empty'.encode('utf8')]), }) else: feature_dict.update({ 'image/class/label': dataset_util.int64_list_feature([category_ids[0]]), 'image/class/text': dataset_util.bytes_list_feature([category_names[0]]), }) else: # Add empty class if there are no annotations feature_dict.update({ 'image/class/label': dataset_util.int64_list_feature([0]), 'image/class/text': dataset_util.bytes_list_feature(['empty'.encode('utf8')]), }) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) self._num_examples_processed.inc(1) return [(example)]
def dict_to_tf_example(data, image_subdirectory='JPEGImages'): """ Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). 1image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join('/home/lion/dataset', data['file']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] ymin = [] xmax = [] ymax = [] poses = [] classes = [] classes_text = [] width = int(data['width']) height = int(data['height']) xmin.append(float(data['bbox']['x1']) / width) xmax.append(float(data['bbox']['x2']) / width) if int(data['category_class']) == 2: #excepyion about bottom ymin.append(float(data['bbox']['y1'] + 8) / height) else: ymin.append(float(data['bbox']['y1']) / height) if int(data['category_class']) == 1: #exception about top ymax.append(float(data['bbox']['y2'] - 8) / height) else: ymax.append(float(data['bbox']['y2']) / height) classes_text.append(data['category_name'].encode('utf8')) if data['category_class'] == str(1): classes.append(int(data['category_class'])) if data['category_class'] == str(3): classes.append(int(2)) difficult = [0] truncated = [0] poses.append('Frontal'.encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['file'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(data['_id']).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def dict_to_tf_example(annotation, dataset_directory, label_map_dict): im_path = str(annotation['relative_im_path']) cls = int(annotation['class']) x1 = int(annotation['bbox_x1']) y1 = int(annotation['bbox_y1']) x2 = int(annotation['bbox_x2']) y2 = int(annotation['bbox_y2']) # read image full_img_path = os.path.join(dataset_directory, im_path) # read in the image and make a thumbnail of it max_size = 500, 500 big_image = PIL.Image.open(full_img_path) width, height = big_image.size big_image.thumbnail(max_size, PIL.Image.ANTIALIAS) full_thumbnail_path = os.path.splitext(full_img_path)[0] + '_thumbnail.jpg' big_image.save(full_thumbnail_path) with tf.gfile.GFile(full_thumbnail_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) xmin = [] xmax = [] ymin = [] ymax = [] # calculate box using original image coordinates xmin.append(max(0, x1 / width)) xmax.append(min(1.0, x2 / width)) ymin.append(max(0, y1 / height)) ymax.append(min(1.0, y2 / height)) # set width and height to thumbnail size for tfrecord ingest width, height = image.size classes = [] classes_text = [] label = '' for name, val in label_map_dict.items(): if val == cls: label = name break classes_text.append(label.encode('utf8')) classes.append(label_map_dict[label]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(full_img_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(full_img_path.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def create_tf_example(image, annotations_list, image_dir, category_index, total_targets): filename = image['filename'] image_id = image['id'] full_path = os.path.join(image_dir, filename) image_array = Image.open(full_path) image_array = np.asarray(image_array) image_height = image_array.shape[0] image_width = image_array.shape[1] with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] category_names = [] category_ids = [] area = [] num_annotations_skipped = 0 for object_annotations in annotations_list: for target in object_annotations['targets']: total_targets += 1 cx = target['center'][0] cy = target['center'][1] x = target['ul'][0] y = target['ul'][1] width = (cx - x) * 2 height = (cy - y) * 2 if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) category_id = int(dataset._category_lookup(target['category'])) category_ids.append(category_id) category_names.append(target['category'].encode('utf8')) area.append(target['bbox_area']) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/area': dataset_util.float_list_feature(area), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped, total_targets
def create_label_tfrecord(images_path): ''' Create label list and TFRecords for Object Detection API ''' (_, folders, files) = next(os.walk(images_path)) if "nologo" in folders: folders.remove("nologo") if len(folders) != 0: for subfolder in folders: sub_path = os.path.join(images_path, subfolder) create_label_tfrecord(sub_path) else: LABELS_FILE = "metadata.txt" #open metadata and load to memory global xmin,ymin,xmax,ymax,label,filename labels_file_path=os.path.join(images_path,LABELS_FILE) for filename in glob.glob(labels_file_path): with open(filename, 'rb') as f: metaDataLine= list(f)[-1] metavalues=metaDataLine.split(b',') label=metavalues[0] xmin=int(metavalues[1]) ymin=int(metavalues[2]) xmax=xmin+int(metavalues[3]) ymax=ymin+int(metavalues[4]) if LABELS_FILE in files: files.remove(LABELS_FILE) for file in files: if imghdr.what(os.path.join(images_path,file)) == 'jpeg' : #print('its a jpeg') image_data = tf.gfile.FastGFile(os.path.join(images_path,file), 'rb').read() image_format = 'jpeg' height = IMAGE_HEIGHT width = IMAGE_WIDTH xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmins.append(float(xmin)/width) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) xmaxs.append(float(xmax)/width) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymins.append(float(ymin)/height) ymaxs = [] # List of normalized bottom y coordinates in bounding box ymaxs.append(float(ymax)/height) # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes_text.append(label) classes = [] # List of integer class id of bounding box (1 per box) classes.append(labelDict.get(label)) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), b'image/filename': dataset_util.bytes_feature(filename), b'image/source_id': dataset_util.bytes_feature(filename), b'image/encoded': dataset_util.bytes_feature(image_data), b'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) tfrecord_examples.append(tf_example) else: print('its not a jpeg. ignoring image')
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] # if isinstance( image_id, unicode ): # import pdb # pdb.set_trace() full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) # if isinstance( image_id, unicode ): # import pdb # pdb.set_trace() example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, label_map_dict, img_path): with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = image.width height = image.height xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] for shape in data['Layers']['Layer']['Shapes']['Shape']: text = shape['BlockText']['Text'].text if not (text.startswith('Panel') or text.startswith('panel')): continue attrib = shape['Data']['Extent'].attrib x = float(attrib['X']) y = float(attrib['Y']) w = float(attrib['Width']) h = float(attrib['Height']) xmin = x xmax = x + w ymin = y ymax = y + h xmin /= width ymin /= height xmax /= width ymax /= height if xmin < 0 or ymin < 0 or xmax > 1.01 or ymax > 1.01: print(img_path) xmins.append(xmin) ymins.append(ymin) xmaxs.append(xmax) ymaxs.append(ymax) class_name = 'Panel' classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def dict_to_tf_example(data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory='JPEGImages'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data['folder'], image_subdirectory, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
classes_text.append(str(row['class']).encode('utf8')) classes.append(class_dict[str(row['class'])]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example def class_dict_from_pbtxt(pbtxt_path): # open file, strip \n, trim lines and keep only # lines beginning with id or display_name with open(pbtxt_path, 'r', encoding='utf-8-sig') as f: data = f.readlines() name_key = None if any('display_name:' in s for s in data): name_key = 'display_name:' elif any('name:' in s for s in data):
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append(category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return key, example, num_annotations_skipped
def dict_to_tf_example( data, dataset_directory, label_map_dict, ignore_difficult_instances=False, image_subdirectory="./" ): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(data["folder"], image_subdirectory, data["filename"]) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, "rb") as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != "JPEG": raise ValueError("Image format not JPEG") key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data["size"]["width"]) height = int(data["size"]["height"]) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if "object" in data: for obj in data["object"]: difficult = bool(int(obj["difficult"])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj["bndbox"]["xmin"]) / width) ymin.append(float(obj["bndbox"]["ymin"]) / height) xmax.append(float(obj["bndbox"]["xmax"]) / width) ymax.append(float(obj["bndbox"]["ymax"]) / height) classes_text.append(obj["name"].encode("utf8")) classes.append(label_map_dict[obj["name"]]) truncated.append(int(obj["truncated"])) poses.append(obj["pose"].encode("utf8")) example = tf.train.Example( features=tf.train.Features( feature={ "image/height": dataset_util.int64_feature(height), "image/width": dataset_util.int64_feature(width), "image/filename": dataset_util.bytes_feature(data["filename"].encode("utf8")), "image/source_id": dataset_util.bytes_feature(data["filename"].encode("utf8")), "image/key/sha256": dataset_util.bytes_feature(key.encode("utf8")), "image/encoded": dataset_util.bytes_feature(encoded_jpg), "image/format": dataset_util.bytes_feature("jpeg".encode("utf8")), "image/object/bbox/xmin": dataset_util.float_list_feature(xmin), "image/object/bbox/xmax": dataset_util.float_list_feature(xmax), "image/object/bbox/ymin": dataset_util.float_list_feature(ymin), "image/object/bbox/ymax": dataset_util.float_list_feature(ymax), "image/object/class/text": dataset_util.bytes_list_feature(classes_text), "image/object/class/label": dataset_util.int64_list_feature(classes), "image/object/difficult": dataset_util.int64_list_feature(difficult_obj), "image/object/truncated": dataset_util.int64_list_feature(truncated), "image/object/view": dataset_util.bytes_list_feature(poses), } ) ) return example
def dict_to_tf_example(data, label_map_dict, image_subdirectory): """Convert JSON derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') width, height = data['image_w_h'] # Image size xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in data['objects']: x = float(box['x_y_w_h'][0]) y = float(box['x_y_w_h'][1]) w = float(box['x_y_w_h'][2]) h = float(box['x_y_w_h'][3]) xmins.append(float(x / width)) xmaxs.append(float((x + w) / width)) ymins.append(float(y / height)) ymaxs.append(float((y + h) / height)) class_name = box['label'] classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def dict_to_tf_example(data, image_dir, label_map_dict): """ This function normalizes the bounding box coordinates provided by the raw data. Arguments: :param data: dict holding XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) :param image_dir: Path to image director :param label_map_dict: A map from string label names to integers ids. :return: example: The converted tf.Example. """ full_path = os.path.join(image_dir, data['filename']) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] try: for obj in data['object']: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) except KeyError: print(data['filename'] + ' without objects!') difficult_obj = [0] * len(classes) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj) })) return example
def dict_to_tf_example(data, dataset_directory, set_name, label_map_dict, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) dataset_directory: Path to root directory holding PASCAL dataset set_name: name of the set training, validation or test label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(set_name, data['filename']) full_path = os.path.join(dataset_directory, img_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' not in data: data['object'] = [] for obj in data['object']: if obj['name'] in label_map_dict: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, encoded_image): """Populates a TF Example message with image annotations from a data frame. Args: annotations_data_frame: Data frame containing the annotations for a single image. label_map: String to integer label map. encoded_image: The encoded image string Returns: The populated TF Example, if the label of at least one object is present in label_map. Otherwise, returns None. """ filtered_data_frame = annotations_data_frame[ annotations_data_frame.LabelName.isin(label_map)] image_id = annotations_data_frame.ImageID.iloc[0] feature_map = { standard_fields.TfExampleFields.object_bbox_ymin: dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmin: dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), standard_fields.TfExampleFields.object_bbox_ymax: dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), standard_fields.TfExampleFields.object_bbox_xmax: dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), standard_fields.TfExampleFields.object_class_text: dataset_util.bytes_list_feature( filtered_data_frame.LabelName.as_matrix()), standard_fields.TfExampleFields.object_class_label: dataset_util.int64_list_feature( filtered_data_frame.LabelName.map(lambda x: label_map[x]) .as_matrix()), standard_fields.TfExampleFields.filename: dataset_util.bytes_feature('{}.jpg'.format(image_id)), standard_fields.TfExampleFields.source_id: dataset_util.bytes_feature(image_id), standard_fields.TfExampleFields.image_encoded: dataset_util.bytes_feature(encoded_image), } if 'IsGroupOf' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_group_of] = dataset_util.int64_list_feature( filtered_data_frame.IsGroupOf.as_matrix().astype(int)) if 'IsOccluded' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_occluded] = dataset_util.int64_list_feature( filtered_data_frame.IsOccluded.as_matrix().astype(int)) if 'IsTruncated' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_truncated] = dataset_util.int64_list_feature( filtered_data_frame.IsTruncated.as_matrix().astype(int)) if 'IsDepiction' in filtered_data_frame.columns: feature_map[standard_fields.TfExampleFields. object_depiction] = dataset_util.int64_list_feature( filtered_data_frame.IsDepiction.as_matrix().astype(int)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
def dict_to_tf_example(example): filename = example['filename'] filename = filename.encode() with tf.io.gfile.GFile(example['filename'], 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) width, height = image.size if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() xmins = [] # left x-coordinate ymins = [] # right x-coordinate xmaxs = [] # top y-coordinate ymaxs = [] # buttom y-coordinate classes = [] # class id classes_text = [] # class name for box in example['annotations']: xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin'] + box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABELS_MAP[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes) })) return tf_example
def dict_to_tf_example(data, mask_path, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() with tf.gfile.GFile(mask_path, 'rb') as fid: encoded_mask_png = fid.read() encoded_png_io = io.BytesIO(encoded_mask_png) mask = PIL.Image.open(encoded_png_io) if mask.format != 'PNG': raise ValueError('Mask format not PNG') mask_np = np.asarray(mask) nonbackground_indices_x = np.any(mask_np != 2, axis=0) nonbackground_indices_y = np.any(mask_np != 2, axis=1) nonzero_x_indices = np.where(nonbackground_indices_x) nonzero_y_indices = np.where(nonbackground_indices_y) width = int(data['size']['width']) height = int(data['size']['height']) xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax']) else: xmin = float(np.min(nonzero_x_indices)) xmax = float(np.max(nonzero_x_indices)) ymin = float(np.min(nonzero_y_indices)) ymax = float(np.max(nonzero_y_indices)) xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8')) if not faces_only: mask_remapped = (mask_np != 2).astype(np.uint8) masks.append(mask_remapped) feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), } if not faces_only: if mask_type == 'numerical': mask_stack = np.stack(masks).astype(np.float32) masks_flattened = np.reshape(mask_stack, [-1]) feature_dict['image/object/mask'] = ( dataset_util.float_list_feature(masks_flattened.tolist())) elif mask_type == 'png': encoded_mask_png_list = [] for mask in masks: img = PIL.Image.fromarray(mask) output = io.BytesIO() img.save(output, format='PNG') encoded_mask_png_list.append(output.getvalue()) feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png_list)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tf_example(self, number_of_dice: int, creation_seed: int): """Creates a labelled TFExample with the given number of dice at the corresponding creation seed.""" image, dice_bounding_boxes, dice_top_face_indices, dice_y_rotations, dot_boxes_list, camera_matrix = self.get_dice_boxes_indices_and_dot_boxes( number_of_dice, creation_seed) camera_intrinsic = camera_matrix.ravel() camera_distortion = np.zeros(5) dice_y_rotations = [ 0.000001 if dice_rot is 0 else dice_rot for dice_rot in dice_y_rotations ] #"Zero" rotation is a special label for 'no rotation data defined' all_dot_boxes = [ box for dot_boxes_per_dice in dot_boxes_list for box in dot_boxes_per_dice ] all_dot_classes = [0] * len(all_dot_boxes) if len( all_dot_boxes) > 0 else [] all_dot_y_rotations = [0] * len(all_dot_boxes) if len( all_dot_boxes) > 0 else [] all_y_rotations = dice_y_rotations + all_dot_y_rotations all_boxes = dice_bounding_boxes + all_dot_boxes xmins = [box[0] / self.image_width for box in all_boxes] ymins = [box[1] / self.image_height for box in all_boxes] xmaxs = [(box[0] + box[2]) / self.image_width for box in all_boxes] ymaxs = [(box[1] + box[3]) / self.image_height for box in all_boxes] all_classes_unshifted = dice_top_face_indices + all_dot_classes all_class_strings = [ str.encode(self.class_string_dictionary[class_id]) for class_id in all_classes_unshifted ] all_class_ids = [ id + 1 for id in all_classes_unshifted ] #Since label maps for TF object detection start at 1, it seems image_resized = tf.image.resize_with_pad( image, self.target_image_width, self.target_image_height, method=tf.image.ResizeMethod.BILINEAR, antialias=False) image_resized_normalized = image_resized / 255.0 image_resized_converted = tf.image.convert_image_dtype( image_resized_normalized, dtype=tf.uint16, saturate=False) encoded_image_data = tf.image.encode_png( image_resized_converted.numpy()) encoded_image_data_np = encoded_image_data.numpy() image_format = b'png' image_name = str.encode("image_{}_{}".format(number_of_dice, creation_seed)) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(self.image_height), 'image/width': dataset_util.int64_feature(self.image_width), 'image/filename': dataset_util.bytes_feature(image_name), 'image/source_id': dataset_util.bytes_feature(image_name), 'image/encoded': dataset_util.bytes_feature(encoded_image_data_np), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(all_class_strings), 'image/object/class/label': dataset_util.int64_list_feature(all_class_ids), 'image/object/rotation/y_angle': dataset_util.float_list_feature(all_y_rotations), 'image/camera/intrinsic': dataset_util.float_list_feature(camera_intrinsic), 'image/camera/distortion': dataset_util.float_list_feature(camera_distortion), })) return tf_example
def create_one_tf_example(data, class_map, path='', channels=4): ''' Creates a tf.Example proto from sample cat image. Args: data with a dictionnary containing path, width etc Returns: example: The created tf.Example. ''' height = data['height'] width = data['width'] filename = data['filepath'] format_img = data['format'] #with tf.gfile.GFile(path+filename[0], 'r') as fid: #print(fid.shape) #encoded_image_data = fid.read() # we consider that each image contain one channel for i in range(len(filename)): img_temp = cv2.imread(filename[i]) if img_temp is None: print('error loading img') temp = np.mean(img_temp, axis=-1) if temp is None: print("can't open image, check path parameters ! ") return temp = np.expand_dims(temp, axis=-1) if i == 0: img = temp else: img = np.concatenate([img, temp], axis=-1) #if img.shape != (1920, 1920, 4): # print(img.shape) img = img.astype(np.uint8) img_encoded = img.tostring() #filename = filename.encode('utf8') if format_img == 'png': image_format = b'png' elif format_img == 'jpg': image_format = b'jpg' else: print('Error format retry') xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for box in data['bboxes']: xmins.append(box['xmin'] / float(width)) xmaxs.append(box['xmax'] / float(width)) ymins.append(box['ymin'] / float(height)) ymaxs.append(box['ymax'] / float(height)) classes_text.append(box['class'].encode('utf8')) classes.append(class_map[box['class']]) # with tf.gfile.GFile(os.path.join(path, '{}'.format(filename)), 'rb') as fid: #print(filename) #print(path+filename) #print(cv2.imread((path+filename).replace('\\','/'))) feat = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), #'image/filename': dataset_util.bytes_feature(filename), #'image/source_id': dataset_util.bytes_feature(filename), 'image/channels': dataset_util.int64_feature(channels), 'image/encoded': dataset_util.bytes_feature(img_encoded), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes) } tf_example = tf.train.Example(features=tf.train.Features(feature=feat)) return tf_example
def bbox_to_tf_example(bbox, label, image_file, label_map_dict, image_subdirectory, ignore_difficult_instances=False): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, image_file) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] for b, l in zip(bbox, label): xmin.append(min(float(b[0]) / width, 1.0)) ymin.append(min(float(b[1]) / height, 1.0)) xmax.append(min(float(b[0]+b[2]) / width, 1.0)) ymax.append(min(float(b[1]+b[3]) / height, 1.0)) class_name = get_class_name_from_label(l) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( image_file.encode('utf8')), 'image/source_id': dataset_util.bytes_feature( image_file.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes) })) return example
def prepare_example(image_path, annotations, label_map_dict): """Converts a dictionary with annotations for an image to tf.Example proto. Args: image_path: The complete path to image. annotations: A dictionary representing the annotation of a single object that appears in the image. label_map_dict: A map from string label names to integer ids. Returns: example: The converted tf.Example. """ with tf.gfile.GFile(image_path, 'rb') as fid: encoded_png = fid.read() encoded_png_io = io.BytesIO(encoded_png) image = pil.open(encoded_png_io) image = np.asarray(image) key = hashlib.sha256(encoded_png).hexdigest() width = int(image.shape[1]) height = int(image.shape[0]) xmin_norm = annotations['2d_bbox_left'] / float(width) ymin_norm = annotations['2d_bbox_top'] / float(height) xmax_norm = annotations['2d_bbox_right'] / float(width) ymax_norm = annotations['2d_bbox_bottom'] / float(height) difficult_obj = [0]*len(xmin_norm) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature('png'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm), 'image/object/class/text': dataset_util.bytes_list_feature( [x.encode('utf8') for x in annotations['type']]), 'image/object/class/label': dataset_util.int64_list_feature( [label_map_dict[x] for x in annotations['type']]), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.float_list_feature( annotations['truncated']), 'image/object/alpha': dataset_util.float_list_feature( annotations['alpha']), 'image/object/3d_bbox/height': dataset_util.float_list_feature( annotations['3d_bbox_height']), 'image/object/3d_bbox/width': dataset_util.float_list_feature( annotations['3d_bbox_width']), 'image/object/3d_bbox/length': dataset_util.float_list_feature( annotations['3d_bbox_length']), 'image/object/3d_bbox/x': dataset_util.float_list_feature( annotations['3d_bbox_x']), 'image/object/3d_bbox/y': dataset_util.float_list_feature( annotations['3d_bbox_y']), 'image/object/3d_bbox/z': dataset_util.float_list_feature( annotations['3d_bbox_z']), 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature( annotations['3d_bbox_rot_y']), })) return example
def create_tf_example(example): # Udacity real data set height = 1096 # Image height width = 1368 # Image width filename = example[ 'filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin'] + box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_record(output_filename, num_shards, examples): with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): img_path = os.path.join(read_bucket, example) if not os.path.isfile(img_path): continue with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width, height = image.size xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] # 'coke', 'pepsi', 'coke'... classes = [] # 1, 2, 1... difficult_obj = [] truncated = [] poses = [] for annotation in annotations[example]: xmins.append(annotation['x']) xmaxs.append(annotation['x2']) ymins.append(annotation['y']) ymaxs.append(annotation['y2']) classes_text.append(annotation['label'].encode('utf8')) classes.append(1) # temporary, I need to assign labels to actual ids difficult_obj.append(0) truncated.append(0) poses.append(''.encode('utf8')) try: feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(example.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(example.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses) } tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: print('Invalid example, ignoring.')
def dict_to_tf_example(data, dataset_directory, label_map_dict, image_subdirectory='JPEGImages'): full_path = os.path.join(dataset_directory, image_subdirectory, data['filename'] + '.png') if not os.path.isfile(full_path): full_path = os.path.join(dataset_directory, image_subdirectory, data['filename'] + '.jpg') # print('full_path', full_path) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format not in ['JPEG', 'PNG']: raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] if 'object' not in data.keys(): print('No label detected in the xml format') else: for obj in data['object']: if obj['name'] in selected: xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_golden[obj['name']]) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return example
def create_tf_example(group, path, class_dict): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): if set(['xmin_rel', 'xmax_rel', 'ymin_rel', 'ymax_rel']).issubset(set(row.index)): xmin = row['xmin_rel'] xmax = row['xmax_rel'] ymin = row['ymin_rel'] ymax = row['ymax_rel'] elif set(['xmin', 'xmax', 'ymin', 'ymax']).issubset(set(row.index)): xmin = row['xmin'] / width xmax = row['xmax'] / width ymin = row['ymin'] / height ymax = row['ymax'] / height xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append(row['class'].encode('utf8')) classes.append(class_dict[row['class']]) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(group, path): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) # width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / row['width']) xmaxs.append(row['xmax'] / row['width']) ymins.append(row['ymin'] / row['height']) ymaxs.append(row['ymax'] / row['height']) classes_text.append(row['class']) if row['class'] == 'bluemoon_shouxi_fengqingbailai_500g': classes.append(1) elif row['class'] == 'bluemoon_zhizun_suyalanxiang_660g': classes.append(2) elif row['class'] == 'bluemoon_zhizun_suyalanxiang_box': classes.append(3) elif row['class'] == 'bluemoon_zhizun_suyalanxiang_bag_600g': classes.append(4) elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_box': classes.append(5) elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_bag_600g': classes.append(6) elif row['class'] == 'bluemoon_zhizun_qingyunmeixiang_660g': classes.append(7) elif row['class'] == 'bluemoon_shouxi_shuangyong_fengqingbailai_500g': classes.append(8) elif row['class'] == 'bluemoon_zhizun_new_suyalanxiang_660g': classes.append(9) elif row['class'] == 'bluemoon_zhizun_new_qingyunmeixiang_660g': classes.append(10) elif row['class'] == 'bluemoon_baobao_red_500g': classes.append(11) elif row['class'] == 'bluemoon_jiecibao_500g': classes.append(12) elif row['class'] == 'bluemoon_lanseyueguang_bai_600g': classes.append(13) elif row['class'] == 'bluemoon_shenceng_500g': classes.append(14) elif row['class'] == 'bluemoon_weinuo_500g': classes.append(15) elif row['class'] == 'bluemoon_yurong_500g': classes.append(16) elif row['class'] == 'bluemoon_ertongxishou_caomei_225g': classes.append(17) elif row['class'] == 'bluemoon_ertongxishou_qingpingguo_225g': classes.append(18) elif row['class'] == 'bluemoon_ertongxishou_tiancheng_225g': classes.append(19) elif row['class'] == 'bluemoon_guopaoduoduo_300ml': classes.append(20) elif row['class'] == 'bluemoon_xishouye_luhui_500g': classes.append(21) elif row['class'] == 'bluemoon_xishouye_weie_500g': classes.append(22) elif row['class'] == 'bluemoon_xishouye_yejuhua_500g': classes.append(23) elif row['class'] == 'bluemoon_84xiaoduye_1.2kg': classes.append(24) elif row['class'] == 'bluemoon_bolishui_500g': classes.append(25) elif row['class'] == 'bluemoon_chaqing_500g': classes.append(26) elif row['class'] == 'bluemoon_dibanqingjie_600g': classes.append(27) elif row['class'] == 'bluemoon_piaobaishui_600g': classes.append(28) elif row['class'] == 'bluemoon_quannengshui_500g': classes.append(29) elif row['class'] == 'bluemoon_roushunji_500g': classes.append(30) elif row['class'] == 'bluemoon_yilingjing_500g': classes.append(31) elif row['class'] == 'bluemoon_yiwuxiaoduye_1kg': classes.append(32) elif row['class'] == 'bluemoon_youwukexing_500g': classes.append(33) elif row['class'] == 'bluemoon_liangbai_xunyicao_2kg': classes.append(34) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(row['height']), 'image/width': dataset_util.int64_feature(row['width']), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example