Exemple #1
0
    def testDecodeObjectGroupOf(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        object_group_of = [0, 1]
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/object/group_of':
                dataset_util.int64_list_feature(object_group_of),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.groundtruth_group_of].
             get_shape().as_list()), [2])
        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(
            [bool(item) for item in object_group_of],
            tensor_dict[fields.InputDataFields.groundtruth_group_of])
Exemple #2
0
    def testDecodeEmptyPngInstanceMasks(self):
        image_tensor = np.random.randint(256,
                                         size=(10, 10, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        encoded_masks = []
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature(b'jpeg'),
                'image/object/mask': dataset_util.bytes_list_feature(
                    encoded_masks),
                'image/height': dataset_util.int64_feature(10),
                'image/width': dataset_util.int64_feature(10),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder(
            load_instance_masks=True,
            instance_mask_type=input_reader_pb2.PNG_MASKS)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)
            self.assertAllEqual(
                tensor_dict[
                    fields.InputDataFields.groundtruth_instance_masks].shape,
                [0, 10, 10])
Exemple #3
0
    def testDecodeAdditionalChannels(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)

        additional_channel_tensor = np.random.randint(
            256, size=(4, 5, 1)).astype(np.uint8)
        encoded_additional_channel = self._EncodeImage(
            additional_channel_tensor)
        decoded_additional_channel = self._DecodeImage(
            encoded_additional_channel)

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/additional_channels/encoded':
                dataset_util.bytes_list_feature([encoded_additional_channel] *
                                                2),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/source_id':
                dataset_util.bytes_feature(b'image_id'),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder(
            num_additional_channels=2)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)
            self.assertAllEqual(
                np.concatenate([decoded_additional_channel] * 2, axis=2),
                tensor_dict[fields.InputDataFields.image_additional_channels])
Exemple #4
0
    def create_tf_record(self):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature('jpeg'.encode(
                    'utf8')),
                'image/height': dataset_util.int64_feature(4),
                'image/width': dataset_util.int64_feature(5),
                'image/object/bbox/xmin': dataset_util.float_list_feature(
                    [0.0]),
                'image/object/bbox/xmax': dataset_util.float_list_feature(
                    [1.0]),
                'image/object/bbox/ymin': dataset_util.float_list_feature(
                    [0.0]),
                'image/object/bbox/ymax': dataset_util.float_list_feature(
                    [1.0]),
                'image/object/class/label': dataset_util.int64_list_feature(
                    [2]),
                'image/object/mask': dataset_util.float_list_feature(
                    flat_mask),
            }))
        writer.write(example.SerializeToString())
        writer.close()

        return path
Exemple #5
0
    def testDecodeObjectWeight(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        object_weights = [0.75, 1.0]
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/object/weight':
                dataset_util.float_list_feature(object_weights),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual((tensor_dict[
            fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
                            [None])
        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(
            object_weights,
            tensor_dict[fields.InputDataFields.groundtruth_weights])
Exemple #6
0
    def testDecodeJpegImage(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        decoded_jpeg = self._DecodeImage(encoded_jpeg)
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature(b'jpeg'),
                'image/source_id': dataset_util.bytes_feature(b'image_id'),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
            [None, None, 3])
        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.original_image_spatial_shape].
             get_shape().as_list()), [2])
        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(decoded_jpeg,
                            tensor_dict[fields.InputDataFields.image])
        self.assertAllEqual(
            [4, 5],
            tensor_dict[fields.InputDataFields.original_image_spatial_shape])
        self.assertEqual(b'image_id',
                         tensor_dict[fields.InputDataFields.source_id])
Exemple #7
0
    def testDecodePngInstanceMasks(self):
        image_tensor = np.random.randint(256,
                                         size=(10, 10, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
        mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
        encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
        decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
        encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
        decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
        encoded_masks = [encoded_png_1, encoded_png_2]
        decoded_masks = np.stack([decoded_png_1, decoded_png_2])
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature(b'jpeg'),
                'image/object/mask': dataset_util.bytes_list_feature(
                    encoded_masks)
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder(
            load_instance_masks=True,
            instance_mask_type=input_reader_pb2.PNG_MASKS)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(
            decoded_masks,
            tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
Exemple #8
0
    def testDecodeInstanceSegmentation(self):
        num_instances = 4
        image_height = 5
        image_width = 3

        # Randomly generate image.
        image_tensor = np.random.randint(256,
                                         size=(image_height, image_width,
                                               3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)

        # Randomly generate instance segmentation masks.
        instance_masks = (np.random.randint(2,
                                            size=(num_instances, image_height,
                                                  image_width)).astype(
                                                      np.float32))
        instance_masks_flattened = np.reshape(instance_masks, [-1])

        # Randomly generate class labels for each instance.
        object_classes = np.random.randint(100, size=(num_instances)).astype(
            np.int64)

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/height':
                dataset_util.int64_feature(image_height),
                'image/width':
                dataset_util.int64_feature(image_width),
                'image/object/mask':
                dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
        example_decoder = tf_example_decoder.TfExampleDecoder(
            load_instance_masks=True)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
             get_shape().as_list()), [4, 5, 3])

        self.assertAllEqual((tensor_dict[
            fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
                            [4])

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(
            instance_masks.astype(np.float32),
            tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
        self.assertAllEqual(
            object_classes,
            tensor_dict[fields.InputDataFields.groundtruth_classes])
Exemple #9
0
    def testDecodeKeypoint(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        bbox_ymins = [0.0, 4.0]
        bbox_xmins = [1.0, 5.0]
        bbox_ymaxs = [2.0, 6.0]
        bbox_xmaxs = [3.0, 7.0]
        keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
        keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(bbox_xmaxs),
                'image/object/keypoint/y':
                dataset_util.float_list_feature(keypoint_ys),
                'image/object/keypoint/x':
                dataset_util.float_list_feature(keypoint_xs),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual((tensor_dict[
            fields.InputDataFields.groundtruth_boxes].get_shape().as_list()),
                            [None, 4])
        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.groundtruth_keypoints].
             get_shape().as_list()), [2, 3, 2])
        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        expected_boxes = np.vstack(
            [bbox_ymins, bbox_xmins, bbox_ymaxs, bbox_xmaxs]).transpose()
        self.assertAllEqual(
            expected_boxes,
            tensor_dict[fields.InputDataFields.groundtruth_boxes])
        self.assertAllEqual(
            2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])

        expected_keypoints = (np.vstack([keypoint_ys,
                                         keypoint_xs]).transpose().reshape(
                                             (2, 3, 2)))
        self.assertAllEqual(
            expected_keypoints,
            tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def create_tf_example(group):
    path = images_dir + '/' + group.filename
    with tf.gfile.GFile(path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    # NOTE: coordinates is divided by 2 because of image is resized by half
    for index, row in group.object.iterrows():
        xmins.append((row['xmin'] / 2) / width)
        xmaxs.append((row['xmax'] / 2) / width)
        ymins.append((row['ymin'] / 2) / height)
        ymaxs.append((row['ymax'] / 2) / height)
        classes_text.append(row['sign_class'].encode('utf8'))
        classes.append(int(row['sign_id']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemple #11
0
def create_tf_example(group):
    path = file_location(group.filename)
    with tf.gfile.GFile(path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['x_from'] / width)
        xmaxs.append(row['x_from'] + row['width'] / width)
        ymins.append(row['y_from'] / height)
        ymaxs.append(row['y_from'] + row['height'] / height)
        classes_text.append(row['sign_class'].encode('utf8'))
        classes.append(row['sign_class_id'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Exemple #12
0
 def testDecodeImageLabels(self):
     image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
     encoded_jpeg = self._EncodeImage(image_tensor)
     example = tf.train.Example(features=tf.train.Features(
         feature={
             'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
             'image/format': dataset_util.bytes_feature(b'jpeg'),
             'image/class/label': dataset_util.int64_list_feature([1, 2]),
         })).SerializeToString()
     example_decoder = tf_example_decoder.TfExampleDecoder()
     tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
     with self.test_session() as sess:
         tensor_dict = sess.run(tensor_dict)
     self.assertTrue(
         fields.InputDataFields.groundtruth_image_classes in tensor_dict)
     self.assertAllEqual(
         tensor_dict[fields.InputDataFields.groundtruth_image_classes],
         np.array([1, 2]))
     example = tf.train.Example(features=tf.train.Features(
         feature={
             'image/encoded':
             dataset_util.bytes_feature(encoded_jpeg),
             'image/format':
             dataset_util.bytes_feature(b'jpeg'),
             'image/class/text':
             dataset_util.bytes_list_feature([b'dog', b'cat']),
         })).SerializeToString()
     label_map_string = """
   item {
     id:3
     name:'cat'
   }
   item {
     id:1
     name:'dog'
   }
 """
     label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
     with tf.gfile.Open(label_map_path, 'wb') as f:
         f.write(label_map_string)
     example_decoder = tf_example_decoder.TfExampleDecoder(
         label_map_proto_file=label_map_path)
     tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
     with self.test_session() as sess:
         sess.run(tf.tables_initializer())
         tensor_dict = sess.run(tensor_dict)
     self.assertTrue(
         fields.InputDataFields.groundtruth_image_classes in tensor_dict)
     self.assertAllEqual(
         tensor_dict[fields.InputDataFields.groundtruth_image_classes],
         np.array([1, 3]))
Exemple #13
0
    def create_tf_record(self, has_additional_channels=False, num_examples=1):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        additional_channels_tensor = np.random.randint(
            255, size=(4, 5, 1)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()
            encoded_additional_channels_jpeg = tf.image.encode_jpeg(
                tf.constant(additional_channels_tensor)).eval()
            for i in range(num_examples):
                features = {
                    'image/source_id':
                    dataset_util.bytes_feature(str(i).encode()),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                    'image/format':
                    dataset_util.bytes_feature('jpeg'.encode('utf8')),
                    'image/height':
                    dataset_util.int64_feature(4),
                    'image/width':
                    dataset_util.int64_feature(5),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/class/label':
                    dataset_util.int64_list_feature([2]),
                    'image/object/mask':
                    dataset_util.float_list_feature(flat_mask),
                }
                if has_additional_channels:
                    additional_channels_key = 'image/additional_channels/encoded'
                    features[
                        additional_channels_key] = dataset_util.bytes_list_feature(
                            [encoded_additional_channels_jpeg] * 2)
                example = tf.train.Example(features=tf.train.Features(
                    feature=features))
                writer.write(example.SerializeToString())
            writer.close()

        return path
Exemple #14
0
    def testDecodeImageKeyAndFilename(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/key/sha256': dataset_util.bytes_feature(b'abc'),
                'image/filename': dataset_util.bytes_feature(b'filename')
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertEqual(b'abc', tensor_dict[fields.InputDataFields.key])
        self.assertEqual(b'filename',
                         tensor_dict[fields.InputDataFields.filename])
Exemple #15
0
    def testDecodeObjectLabelNoText(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        bbox_classes = [1, 2]
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/object/class/label':
                dataset_util.int64_list_feature(bbox_classes),
            })).SerializeToString()
        label_map_string = """
      item {
        id:1
        name:'cat'
      }
      item {
        id:2
        name:'dog'
      }
    """
        label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
        with tf.gfile.Open(label_map_path, 'wb') as f:
            f.write(label_map_string)

        example_decoder = tf_example_decoder.TfExampleDecoder(
            label_map_proto_file=label_map_path)
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual((tensor_dict[
            fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
                            [None])

        init = tf.tables_initializer()
        with self.test_session() as sess:
            sess.run(init)
            tensor_dict = sess.run(tensor_dict)

        self.assertAllEqual(
            bbox_classes,
            tensor_dict[fields.InputDataFields.groundtruth_classes])
Exemple #16
0
    def testInstancesNotAvailableByDefault(self):
        num_instances = 4
        image_height = 5
        image_width = 3
        # Randomly generate image.
        image_tensor = np.random.randint(256,
                                         size=(image_height, image_width,
                                               3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)

        # Randomly generate instance segmentation masks.
        instance_masks = (np.random.randint(2,
                                            size=(num_instances, image_height,
                                                  image_width)).astype(
                                                      np.float32))
        instance_masks_flattened = np.reshape(instance_masks, [-1])

        # Randomly generate class labels for each instance.
        object_classes = np.random.randint(100, size=(num_instances)).astype(
            np.int64)

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/height':
                dataset_util.int64_feature(image_height),
                'image/width':
                dataset_util.int64_feature(image_width),
                'image/object/mask':
                dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
        self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
                        not in tensor_dict)
Exemple #17
0
    def testDecodeDefaultGroundtruthWeights(self):
        image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        bbox_ymins = [0.0, 4.0]
        bbox_xmins = [1.0, 5.0]
        bbox_ymaxs = [2.0, 6.0]
        bbox_xmaxs = [3.0, 7.0]
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                dataset_util.bytes_feature(b'jpeg'),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(bbox_xmaxs),
            })).SerializeToString()

        example_decoder = tf_example_decoder.TfExampleDecoder()
        tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

        self.assertAllEqual((tensor_dict[
            fields.InputDataFields.groundtruth_boxes].get_shape().as_list()),
                            [None, 4])

        with self.test_session() as sess:
            tensor_dict = sess.run(tensor_dict)

        self.assertAllClose(
            tensor_dict[fields.InputDataFields.groundtruth_weights],
            np.ones(2, dtype=np.float32))
Exemple #18
0
def create_mock_tfrecord():
    pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8),
                                'RGB')
    image_output_stream = io.BytesIO()
    pil_image.save(image_output_stream, format='png')
    encoded_image = image_output_stream.getvalue()

    feature_map = {
        'test_field':
        dataset_util.float_list_feature([1, 2, 3, 4]),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_map))
    with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
        writer.write(tf_example.SerializeToString())
Exemple #19
0
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
    """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

    filtered_data_frame = annotations_data_frame[
        annotations_data_frame.LabelName.isin(label_map)]
    filtered_data_frame_boxes = filtered_data_frame[~filtered_data_frame.YMin.
                                                    isnull()]
    filtered_data_frame_labels = filtered_data_frame[
        filtered_data_frame.YMin.isnull()]
    image_id = annotations_data_frame.ImageID.iloc[0]

    feature_map = {
        standard_fields.TfExampleFields.object_bbox_ymin:
        dataset_util.float_list_feature(filtered_data_frame_boxes.YMin.values),
        standard_fields.TfExampleFields.object_bbox_xmin:
        dataset_util.float_list_feature(filtered_data_frame_boxes.XMin.values),
        standard_fields.TfExampleFields.object_bbox_ymax:
        dataset_util.float_list_feature(filtered_data_frame_boxes.YMax.values),
        standard_fields.TfExampleFields.object_bbox_xmax:
        dataset_util.float_list_feature(filtered_data_frame_boxes.XMax.values),
        standard_fields.TfExampleFields.object_class_text:
        dataset_util.bytes_list_feature(
            filtered_data_frame_boxes.LabelName.values),
        standard_fields.TfExampleFields.object_class_label:
        dataset_util.int64_list_feature(
            filtered_data_frame_boxes.LabelName.map(
                lambda x: label_map[x]).values),
        standard_fields.TfExampleFields.filename:
        dataset_util.bytes_feature('{}.jpg'.format(image_id).encode()),
        standard_fields.TfExampleFields.source_id:
        dataset_util.bytes_feature(image_id.encode()),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image.encode()),
    }

    if 'IsGroupOf' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_group_of] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsGroupOf.values.astype(int))
    if 'IsOccluded' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_occluded] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsOccluded.values.astype(
                            int))
    if 'IsTruncated' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_truncated] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsTruncated.values.astype(
                            int))
    if 'IsDepiction' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_depiction] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsDepiction.values.astype(
                            int))

    if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
        feature_map[standard_fields.TfExampleFields.
                    image_class_label] = dataset_util.int64_list_feature(
                        filtered_data_frame_labels.LabelName.map(
                            lambda x: label_map[x]).values)
        feature_map[standard_fields.TfExampleFields.
                    image_class_text] = dataset_util.bytes_list_feature(
                        filtered_data_frame_labels.LabelName.values),
    return tf.train.Example(features=tf.train.Features(feature=feature_map))
Exemple #20
0
def create_tf_example(
    image,
    image_dir,
    bbox_annotations=None,
    category_index=None,
    caption_annotations=None,
    include_masks=False,
    num_attributes=None,
):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
        u'width', u'date_captured', u'flickr_url', u'id']
      image_dir: directory containing the image files.
      bbox_annotations:
        list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
          u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
          coordinates in the official COCO dataset are given as [x, y, width,
          height] tuples using absolute coordinates where x, y represent the
          top-left (0-indexed) corner.  This function converts to the format
          expected by the Tensorflow Object Detection API (which is which is
          [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
          size).
      category_index: a dict containing COCO category information keyed by the
        'id' field of each category.  See the label_map_util.create_category_index
        function.
      caption_annotations:
        list of dict with keys: [u'id', u'image_id', u'str'].
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.

    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image["height"]
    image_width = image["width"]
    filename = image["file_name"]
    image_id = image["id"]

    full_path = os.path.join(image_dir, filename)
    image = PIL.Image.open(full_path).convert("RGB")
    print(
        f"Resize image {filename}: ({image.width}, {image.height}) -> ({image_width}, {image_height})"
    )
    image = image.resize((image_width, image_height))
    with tempfile.NamedTemporaryFile("wb", suffix=".jpg") as f:
        image.save(f.name)
        with tf.io.gfile.GFile(f.name, "rb") as fid:
            encoded_jpg = fid.read()

    assert (
        image_width == image.width and image_height == image.height
    ), f"filename={filename}: label width={image_width}, height={image_height} but actual width={image.width}, height={image.height}"

    key = hashlib.sha256(encoded_jpg).hexdigest()
    # Hashing can't work.
    # c.f., https://github.com/tensorflow/tpu/issues/917
    # image_id = hash_image_id(image_id)
    feature_dict = {
        "image/height":
        dataset_util.int64_feature(image_height),
        "image/width":
        dataset_util.int64_feature(image_width),
        "image/filename":
        dataset_util.bytes_feature(filename.encode("utf8")),
        # source_id must be integer string
        # c.f., https://github.com/tensorflow/tpu/issues/516
        # c.f., process_source_id in tf_tpu_models/official/deteciton/utils/dataloader_utils.py
        "image/source_id":
        dataset_util.bytes_feature(str(image_id).encode("utf-8")),
        "image/key/sha256":
        dataset_util.bytes_feature(key.encode("utf8")),
        "image/encoded":
        dataset_util.bytes_feature(encoded_jpg),
        "image/format":
        dataset_util.bytes_feature("jpeg".encode("utf8")),
    }

    num_annotations_skipped = 0
    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        attributes_multi_hot = (np.zeros(
            (len(bbox_annotations),
             num_attributes), dtype=np.bool) if num_attributes else None)
        area = []
        encoded_mask_png = []
        for i, object_annotations in enumerate(bbox_annotations):
            (x, y, width, height) = tuple(object_annotations["bbox"])
            if width <= 0 or height <= 0:
                num_annotations_skipped += 1
                continue
            if x + width > image_width or y + height > image_height:
                num_annotations_skipped += 1
                continue
            xmin.append(float(x) / image_width)
            xmax.append(float(x + width) / image_width)
            ymin.append(float(y) / image_height)
            ymax.append(float(y + height) / image_height)
            is_crowd.append(object_annotations["iscrowd"])
            category_id = int(object_annotations["category_id"])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]["name"].encode("utf8"))
            area.append(object_annotations["area"])

            if include_masks:
                segmentation = object_annotations["segmentation"]
                if isinstance(segmentation, list):
                    if isinstance(segmentation[0], int):
                        binary_mask = _get_binary_mask(segmentation,
                                                       image_height,
                                                       image_width)
                    elif isinstance(segmentation[0], list):
                        run_len_encoding = mask.frPyObjects(
                            segmentation, image_height, image_width)
                        binary_mask = mask.decode(run_len_encoding)
                        if not object_annotations["iscrowd"] and (len(
                                binary_mask.shape) > 2):
                            binary_mask = np.amax(binary_mask, axis=2)
                elif (isinstance(segmentation, dict)
                      and "counts" in segmentation.keys()
                      and "size" in segmentation.keys()):
                    binary_mask = mask.decode(segmentation)
                    if not object_annotations["iscrowd"] and (len(
                            binary_mask.shape) > 2):
                        binary_mask = np.amax(binary_mask, axis=2)
                else:
                    raise ValueError(
                        f"not supported format annotation: {segmentation}")

                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format="PNG")
                encoded_mask_png.append(output_io.getvalue())

            if num_attributes:
                attributes_multi_hot[i,
                                     object_annotations["attribute_ids"]] = 1

        feature_dict.update({
            "image/object/bbox/xmin":
            dataset_util.float_list_feature(xmin),
            "image/object/bbox/xmax":
            dataset_util.float_list_feature(xmax),
            "image/object/bbox/ymin":
            dataset_util.float_list_feature(ymin),
            "image/object/bbox/ymax":
            dataset_util.float_list_feature(ymax),
            "image/object/class/text":
            dataset_util.bytes_list_feature(category_names),
            "image/object/class/label":
            dataset_util.int64_list_feature(category_ids),
            "image/object/attributes/labels":
            dataset_util.bytes_feature(attributes_multi_hot.tobytes()),
            "image/object/is_crowd":
            dataset_util.int64_list_feature(is_crowd),
            "image/object/area":
            dataset_util.float_list_feature(area),
        })
        if include_masks:
            feature_dict[
                "image/object/mask"] = dataset_util.bytes_list_feature(
                    encoded_mask_png)
    if caption_annotations:
        captions = []
        for caption_annotation in caption_annotations:
            captions.append(caption_annotation["caption"].encode("utf8"))
        feature_dict.update(
            {"image/caption": dataset_util.bytes_list_feature(captions)})

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Exemple #21
0
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(
        data['folder'], image_subdirectory,
        getattr(data['filename'], 'decode', lambda: data['filename'])())
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename']),
            'image/source_id':
            dataset_util.bytes_feature(data['filename']),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Exemple #22
0
def create_tf_example(line):
    # TODO(user): Populate the following variables from your example.

    #filename = None # Filename of the image. Empty if image is not from file
    #encoded_image_data = None # Encoded image bytes

    image_format = b'jpeg'  # b'jpeg' or b'png'

    elements = line.split(' ')

    encoded_image_data = open(os.path.join(FLAGS.data_root, elements[0]),
                              'rb').read()
    filename = elements[0].split('/')[-1].replace('.jpg', '').encode('utf-8')
    source_id = filename
    key = hashlib.sha256(encoded_image_data).hexdigest().encode('utf8')

    xmins = []
    xmaxs = []

    ymins = []
    ymaxs = []

    classes_text = []
    classes = []

    #TODO: Find way to convert cv2 image to bytes so won't open image twice.
    img = cv2.imread(os.path.join(FLAGS.data_root, elements[0]), 0)
    height, width = img.shape[:2]

    for i in elements[1:]:

        _i = i.split(',')

        _xmins = int(_i[0]) / width
        _xmaxs = int(_i[2]) / width
        _ymins = int(_i[1]) / height
        _ymaxs = int(_i[3]) / height

        xmins.append(_xmins)
        xmaxs.append(_xmaxs)
        ymins.append(_ymins)
        ymaxs.append(_ymaxs)

        classes.append(int(_i[4]))
        classes_text.append(dict[int(_i[4])].encode('utf-8'))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/key/sha256':
            dataset_util.bytes_feature(key)
        }))
    return tf_example
def create_tf_example(image_df, image2idx):
    """Converts image and annotations to a tf.Example proto.

  Args - OLD DESCRIPTION FOR THE REFERENCE, IGNORE IT.
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official Open Images dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing Open Images category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    caption_annotations:
      list of dict with keys: [u'id', u'image_id', u'str'].
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_id = image_df.ImageID.values[0]

    # some settings here
    bbox_annotations = True
    include_masks = False

    filename = image_id + '.jpg'

    full_path = os.path.join(FLAGS.image_dir, filename)
    if not os.path.exists(full_path):
        full_path = os.path.join(FLAGS.image_dir2, filename)

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    pil_image = PIL.Image.open(full_path)
    image_height = pil_image.height
    image_width = pil_image.width

    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image2idx[image_id]).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
    }

    # num_annotations_skipped = 0
    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        # encoded_mask_png = []

        for ann in image_df.itertuples():
            xmin.append(ann.XMin)
            xmax.append(ann.XMax)
            ymin.append(ann.YMin)
            ymax.append(ann.YMax)

            # is_crowd.append(object_annotations['iscrowd'])
            is_crowd.append(bool(ann.IsGroupOf))

            # category_id = int(object_annotations['category_id'])
            category_id = class_indices[ann.LabelName]
            # print(category_id)
            category_ids.append(category_id)

            category_name = class_labels[ann.LabelName].encode('utf8')
            # print(category_name)
            category_names.append(category_name)

            # area.append(object_annotations['area'])
            area.append(abs((ann.XMax - ann.XMin) * (ann.YMax - ann.YMin)))

            # if include_masks:
            #   run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
            #                                       image_height, image_width)
            #
            #   binary_mask = mask.decode(run_len_encoding)
            #
            #   if not object_annotations['iscrowd']:
            #     binary_mask = np.amax(binary_mask, axis=2)
            #
            #   pil_image = PIL.Image.fromarray(binary_mask)
            #   output_io = io.BytesIO()
            #   pil_image.save(output_io, format='PNG')
            #   encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })

        # if include_masks:
        #   feature_dict['image/object/mask'] = (
        #       dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example  # key, example, num_annotations_skipped
Exemple #24
0
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0] * len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.float_list_feature(annotations['truncated']),
            'image/object/alpha':
            dataset_util.float_list_feature(annotations['alpha']),
            'image/object/3d_bbox/height':
            dataset_util.float_list_feature(annotations['3d_bbox_height']),
            'image/object/3d_bbox/width':
            dataset_util.float_list_feature(annotations['3d_bbox_width']),
            'image/object/3d_bbox/length':
            dataset_util.float_list_feature(annotations['3d_bbox_length']),
            'image/object/3d_bbox/x':
            dataset_util.float_list_feature(annotations['3d_bbox_x']),
            'image/object/3d_bbox/y':
            dataset_util.float_list_feature(annotations['3d_bbox_y']),
            'image/object/3d_bbox/z':
            dataset_util.float_list_feature(annotations['3d_bbox_z']),
            'image/object/3d_bbox/rot_y':
            dataset_util.float_list_feature(annotations['3d_bbox_rot_y']),
        }))

    return example
def prepare_tfexample(image_path, annotations, label_map_dict):
    image = pil.open(image_path)
    image = np.asarray(image)

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['xmin'] / float(width)
    ymin_norm = annotations['ymin'] / float(height)
    xmax_norm = annotations['xmax'] / float(width)
    ymax_norm = annotations['ymax'] / float(height)

    if np.any(xmin_norm > xmax_norm):
        logging.warn(
            'Image {}, xmin and xmax are replaced: {} - {} / {} - {}'.format(
                image_path, xmin_norm, xmax_norm, annotations['xmin'],
                annotations['xmax']))
        xmin_norm[xmin_norm > xmax_norm], xmax_norm[xmin_norm > xmax_norm] = xmax_norm[xmin_norm > xmax_norm], \
                                                                             xmin_norm[xmin_norm > xmax_norm]

    if np.any(ymin_norm > ymax_norm):
        logging.warn(
            'Image {}, ymin and ymax are replaced: {} - {} / {} - {}'.format(
                image_path, ymin_norm, ymax_norm, annotations['ymin'],
                annotations['ymax']))
        ymin_norm[ymin_norm > ymax_norm], ymax_norm[ymin_norm > ymax_norm] = ymax_norm[ymin_norm > ymax_norm], \
                                                                             ymin_norm[ymin_norm > ymax_norm]

    if np.any(xmin_norm > 1.0) or np.any(xmin_norm < 0.0):
        logging.warn(
            'Image {}, x_min out of bounds: {} / {} - bound: {}'.format(
                image_path, xmin_norm, annotations['xmin'], width))
        # remove completely if the min is out of bounds, broken annotation
        indices = xmin_norm < 1.0
        xmin_norm = xmin_norm[indices]
        xmax_norm = xmax_norm[indices]
        ymin_norm = ymin_norm[indices]
        ymax_norm = ymax_norm[indices]

    if np.any(xmax_norm > 1.0) or np.any(xmax_norm < 0.0):
        logging.warn(
            'Image {}, x_max out of bounds: {} / {} - bound: {}'.format(
                image_path, xmax_norm, annotations['xmax'], width))

        # cut down max out of bounds to 1.0
        xmax_norm[xmax_norm > 1.0] = np.ones_like(xmax_norm[xmax_norm > 1.0])

    if np.any(ymin_norm > 1.0) or np.any(ymin_norm < 0.0):
        logging.warn(
            'Image {}, y_min out of bounds: {} / {} - bound: {}'.format(
                image_path, ymin_norm, annotations['ymin'], height))

        # remove completely if the min is out of bounds, broken annotation
        indices = ymin_norm < 1.0
        ymin_norm = ymin_norm[indices]
        xmin_norm = xmin_norm[indices]
        ymax_norm = ymax_norm[indices]
        xmax_norm = xmax_norm[indices]

    if np.any(ymax_norm > 1.0) or np.any(ymax_norm < 0.0):
        logging.warn(
            'Image {}, y_max out of bounds: {} / {} - bound: {}'.format(
                image_path, ymax_norm, annotations['ymax'], height))

        # cut down max out of bounds to 1.0
        ymax_norm[ymax_norm > 1.0] = np.ones_like(ymax_norm[ymax_norm > 1.0])

    # we ignore the "difficult object" labels for now
    difficult_obj = [0] * len(xmin_norm)

    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    key = hashlib.sha256(encoded_png).hexdigest()

    class_to_key_map = {
        key: idx + 1
        for idx, key in enumerate(sign_name_carolo_dict.keys())
    }

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('jpg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature([
                sign_name_carolo_dict[x].encode('utf8')
                for x in annotations['class']
            ]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [class_to_key_map[x] for x in annotations['class']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
        }))

    return example
Exemple #26
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      caption_annotations=None,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    caption_annotations:
      list of dict with keys: [u'id', u'image_id', u'str'].
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
    }

    num_annotations_skipped = 0
    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])
            if width <= 0 or height <= 0:
                num_annotations_skipped += 1
                continue
            if x + width > image_width or y + height > image_height:
                num_annotations_skipped += 1
                continue
            xmin.append(float(x) / image_width)
            xmax.append(float(x + width) / image_width)
            ymin.append(float(y) / image_height)
            ymax.append(float(y + height) / image_height)
            is_crowd.append(object_annotations['iscrowd'])
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_masks:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                if not object_annotations['iscrowd']:
                    binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())
        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_masks:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))
    if caption_annotations:
        captions = []
        for caption_annotation in caption_annotations:
            captions.append(caption_annotation['caption'].encode('utf8'))
        feature_dict.update(
            {'image/caption': dataset_util.bytes_list_feature(captions)})

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Exemple #27
0
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    mask_np = np.asarray(mask)
    nonbackground_indices_x = np.any(mask_np != 2, axis=0)
    nonbackground_indices_y = np.any(mask_np != 2, axis=1)
    nonzero_x_indices = np.where(nonbackground_indices_x)
    nonzero_y_indices = np.where(nonbackground_indices_y)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))

            if faces_only:
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
            else:
                xmin = float(np.min(nonzero_x_indices))
                xmax = float(np.max(nonzero_x_indices))
                ymin = float(np.min(nonzero_y_indices))
                ymax = float(np.max(nonzero_y_indices))

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)
            class_name = get_class_name_from_filename(data['filename'])
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))
            if not faces_only:
                mask_remapped = (mask_np != 2).astype(np.uint8)
                masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    if not faces_only:
        if mask_type == 'numerical':
            mask_stack = np.stack(masks).astype(np.float32)
            masks_flattened = np.reshape(mask_stack, [-1])
            feature_dict['image/object/mask'] = (
                dataset_util.float_list_feature(masks_flattened.tolist()))
        elif mask_type == 'png':
            encoded_mask_png_list = []
            for mask in masks:
                img = PIL.Image.fromarray(mask)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example