Exemple #1
0
  def testDecodeJpegImage(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    decoded_jpeg = self._DecodeImage(encoded_jpeg)
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/source_id':
                    dataset_util.bytes_feature(six.b('image_id')),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
                         get_shape().as_list()), [None, None, 3])
    self.assertAllEqual((tensor_dict[fields.InputDataFields.
                                     original_image_spatial_shape].
                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
                                            original_image_spatial_shape])
    self.assertEqual(
        six.b('image_id'), tensor_dict[fields.InputDataFields.source_id])
Exemple #2
0
  def testDecodeAdditionalChannels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    additional_channel_tensor = np.random.randint(
        256, size=(4, 5, 1)).astype(np.uint8)
    encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
    decoded_additional_channel = self._DecodeImage(encoded_additional_channel)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/additional_channels/encoded':
                    dataset_util.bytes_list_feature(
                        [encoded_additional_channel] * 2),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/source_id':
                    dataset_util.bytes_feature(six.b('image_id')),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        num_additional_channels=2)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          np.concatenate([decoded_additional_channel] * 2, axis=2),
          tensor_dict[fields.InputDataFields.image_additional_channels])
Exemple #3
0
  def testDecodeObjectWeight(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_weights = [0.75, 1.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/weight':
                    dataset_util.float_list_feature(object_weights),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
                         .get_shape().as_list()), [None])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(object_weights,
                        tensor_dict[fields.InputDataFields.groundtruth_weights])
    def create_tf_record(self):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/format': dataset_util.bytes_feature('jpeg'.encode(
                    'utf8')),
                'image/height': dataset_util.int64_feature(4),
                'image/width': dataset_util.int64_feature(5),
                'image/object/bbox/xmin': dataset_util.float_list_feature(
                    [0.0]),
                'image/object/bbox/xmax': dataset_util.float_list_feature(
                    [1.0]),
                'image/object/bbox/ymin': dataset_util.float_list_feature(
                    [0.0]),
                'image/object/bbox/ymax': dataset_util.float_list_feature(
                    [1.0]),
                'image/object/class/label': dataset_util.int64_list_feature(
                    [2]),
                'image/object/mask': dataset_util.float_list_feature(
                    flat_mask),
            }))
        writer.write(example.SerializeToString())
        writer.close()

        return path
Exemple #5
0
  def testDecodeEmptyMultiClassScores(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_multiclass_scores=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
    self.assertEqual(0,
                     tensor_dict[fields.InputDataFields.multiclass_scores].size)
Exemple #6
0
  def testDecodeEmptyPngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    encoded_masks = []
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks),
                'image/height':
                    dataset_util.int64_feature(10),
                'image/width':
                    dataset_util.int64_feature(10),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
      self.assertAllEqual(
          tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
          [0, 10, 10])
Exemple #7
0
  def testDecodeDefaultGroundtruthWeights(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
                         .get_shape().as_list()), [None, 4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
                        np.ones(2, dtype=np.float32))
Exemple #8
0
  def testDecodePngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
    encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
    decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
    encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
    decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
    encoded_masks = [encoded_png_1, encoded_png_2]
    decoded_masks = np.stack([decoded_png_1, decoded_png_2])
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks)
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        decoded_masks,
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
Exemple #9
0
  def testDecodeObjectGroupOf(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    object_group_of = [0, 1]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/group_of':
                    dataset_util.int64_list_feature(object_group_of),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
         .as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        [bool(item) for item in object_group_of],
        tensor_dict[fields.InputDataFields.groundtruth_group_of])
Exemple #10
0
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
    image_width = 3

    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=True)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
         .get_shape().as_list()), [4, 5, 3])

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
                         .get_shape().as_list()), [4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(
        instance_masks.astype(np.float32),
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
    self.assertAllEqual(object_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
Exemple #11
0
 def testDecodeImageLabels(self):
   image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
   encoded_jpeg = self._EncodeImage(image_tensor)
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
               'image/format': dataset_util.bytes_feature(six.b('jpeg')),
               'image/class/label': dataset_util.int64_list_feature([1, 2]),
           })).SerializeToString()
   example_decoder = tf_example_decoder.TfExampleDecoder()
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 2]))
   example = tf.train.Example(
       features=tf.train.Features(
           feature={
               'image/encoded':
                   dataset_util.bytes_feature(encoded_jpeg),
               'image/format':
                   dataset_util.bytes_feature(six.b('jpeg')),
               'image/class/text':
                   dataset_util.bytes_list_feature(
                       [six.b('dog'), six.b('cat')]),
           })).SerializeToString()
   label_map_string = """
     item {
       id:3
       name:'cat'
     }
     item {
       id:1
       name:'dog'
     }
   """
   label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
   with tf.gfile.Open(label_map_path, 'wb') as f:
     f.write(label_map_string)
   example_decoder = tf_example_decoder.TfExampleDecoder(
       label_map_proto_file=label_map_path)
   tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
   with self.test_session() as sess:
     sess.run(tf.tables_initializer())
     tensor_dict = sess.run(tensor_dict)
   self.assertTrue(
       fields.InputDataFields.groundtruth_image_classes in tensor_dict)
   self.assertAllEqual(
       tensor_dict[fields.InputDataFields.groundtruth_image_classes],
       np.array([1, 3]))
Exemple #12
0
  def testDecodeKeypoint(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_ymins = [0.0, 4.0]
    bbox_xmins = [1.0, 5.0]
    bbox_ymaxs = [2.0, 6.0]
    bbox_xmaxs = [3.0, 7.0]
    keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
    keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
                    dataset_util.float_list_feature(bbox_xmins),
                'image/object/bbox/ymax':
                    dataset_util.float_list_feature(bbox_ymaxs),
                'image/object/bbox/xmax':
                    dataset_util.float_list_feature(bbox_xmaxs),
                'image/object/keypoint/y':
                    dataset_util.float_list_feature(keypoint_ys),
                'image/object/keypoint/x':
                    dataset_util.float_list_feature(keypoint_xs),
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
                         .get_shape().as_list()), [None, 4])
    self.assertAllEqual(
        (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
         .as_list()), [2, 3, 2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
                                bbox_xmaxs]).transpose()
    self.assertAllEqual(expected_boxes,
                        tensor_dict[fields.InputDataFields.groundtruth_boxes])

    expected_keypoints = (
        np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
    self.assertAllEqual(
        expected_keypoints,
        tensor_dict[fields.InputDataFields.groundtruth_keypoints])
    def create_tf_record(self, has_additional_channels=False, num_examples=1):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        additional_channels_tensor = np.random.randint(
            255, size=(4, 5, 1)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()
            encoded_additional_channels_jpeg = tf.image.encode_jpeg(
                tf.constant(additional_channels_tensor)).eval()
            for i in range(num_examples):
                features = {
                    'image/source_id':
                    dataset_util.bytes_feature(str(i)),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                    'image/format':
                    dataset_util.bytes_feature('jpeg'.encode('utf8')),
                    'image/height':
                    dataset_util.int64_feature(4),
                    'image/width':
                    dataset_util.int64_feature(5),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature([0.0]),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature([1.0]),
                    'image/object/class/label':
                    dataset_util.int64_list_feature([2]),
                    'image/object/mask':
                    dataset_util.float_list_feature(flat_mask),
                }
                if has_additional_channels:
                    additional_channels_key = 'image/additional_channels/encoded'
                    features[
                        additional_channels_key] = dataset_util.bytes_list_feature(
                            [encoded_additional_channels_jpeg] * 2)
                example = tf.train.Example(features=tf.train.Features(
                    feature=features))
                writer.write(example.SerializeToString())
            writer.close()

        return path
Exemple #14
0
  def testDecodeObjectLabelWithText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = [six.b('cat'), six.b('dog')]
    # Annotation label gets overridden by labelmap id.
    annotated_bbox_classes = [3, 4]
    expected_bbox_classes = [1, 2]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(annotated_bbox_classes),
            })).SerializeToString()
    label_map_string = """
      item {
        id:1
        name:'cat'
      }
      item {
        id:2
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    init = tf.tables_initializer()
    with self.test_session() as sess:
      sess.run(init)
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(expected_bbox_classes,
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
Exemple #15
0
  def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    bbox_classes_text = [six.b('cat'), six.b('cheetah')]
    bbox_classes_id = [5, 6]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes_id),
            })).SerializeToString()

    label_map_string = """
      item {
        name:'/m/cat'
        id:3
        display_name:'cat'
      }
      item {
        name:'/m/dog'
        id:1
        display_name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)
    example_decoder = tf_example_decoder.TfExampleDecoder(
        label_map_proto_file=label_map_path)
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      sess.run(tf.tables_initializer())
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual([3, -1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])
Exemple #16
0
  def testDecodeImageKeyAndFilename(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
                'image/key/sha256': dataset_util.bytes_feature(six.b('abc')),
                'image/filename': dataset_util.bytes_feature(six.b('filename'))
            })).SerializeToString()

    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertEqual(six.b('abc'), tensor_dict[fields.InputDataFields.key])
    self.assertEqual(
        six.b('filename'), tensor_dict[fields.InputDataFields.filename])
Exemple #17
0
  def testInstancesNotAvailableByDefault(self):
    num_instances = 4
    image_height = 5
    image_width = 3
    # Randomly generate image.
    image_tensor = np.random.randint(
        256, size=(image_height, image_width, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)

    # Randomly generate instance segmentation masks.
    instance_masks = (
        np.random.randint(2, size=(num_instances, image_height,
                                   image_width)).astype(np.float32))
    instance_masks_flattened = np.reshape(instance_masks, [-1])

    # Randomly generate class labels for each instance.
    object_classes = np.random.randint(
        100, size=(num_instances)).astype(np.int64)

    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
                    dataset_util.int64_feature(image_width),
                'image/object/mask':
                    dataset_util.float_list_feature(instance_masks_flattened),
                'image/object/class/label':
                    dataset_util.int64_list_feature(object_classes)
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    self.assertTrue(
        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
Exemple #18
0
def create_mock_tfrecord():
    pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8),
                                'RGB')
    image_output_stream = StringIO.StringIO()
    pil_image.save(image_output_stream, format='png')
    encoded_image = image_output_stream.getvalue()

    feature_map = {
        'test_field':
        dataset_util.float_list_feature([1, 2, 3, 4]),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    tf_example = tf.train.Example(features=tf.train.Features(
        feature=feature_map))
    with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
        writer.write(tf_example.SerializeToString())
def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    dataset_directory: Path to root directory holding PASCAL dataset
    label_map_dict: A map from string label names to integers ids.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    image_subdirectory: String specifying subdirectory within the
      PASCAL dataset directory holding the actual image data.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(data['folder'], image_subdirectory,
                            data['filename'])
    full_path = os.path.join(dataset_directory, img_path)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(data['filename'].encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
def prepare_example(image_path, annotations, label_map_dict):
    """Converts a dictionary with annotations for an image to tf.Example proto.

  Args:
    image_path: The complete path to image.
    annotations: A dictionary representing the annotation of a single object
      that appears in the image.
    label_map_dict: A map from string label names to integer ids.

  Returns:
    example: The converted tf.Example.
  """
    with tf.gfile.GFile(image_path, 'rb') as fid:
        encoded_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_png)
    image = pil.open(encoded_png_io)
    image = np.asarray(image)

    key = hashlib.sha256(encoded_png).hexdigest()

    width = int(image.shape[1])
    height = int(image.shape[0])

    xmin_norm = annotations['2d_bbox_left'] / float(width)
    ymin_norm = annotations['2d_bbox_top'] / float(height)
    xmax_norm = annotations['2d_bbox_right'] / float(width)
    ymax_norm = annotations['2d_bbox_bottom'] / float(height)

    difficult_obj = [0] * len(xmin_norm)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(image_path.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_png),
            'image/format':
            dataset_util.bytes_feature('png'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin_norm),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax_norm),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin_norm),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax_norm),
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                [x.encode('utf8') for x in annotations['type']]),
            'image/object/class/label':
            dataset_util.int64_list_feature(
                [label_map_dict[x] for x in annotations['type']]),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.float_list_feature(annotations['truncated']),
            'image/object/alpha':
            dataset_util.float_list_feature(annotations['alpha']),
            'image/object/3d_bbox/height':
            dataset_util.float_list_feature(annotations['3d_bbox_height']),
            'image/object/3d_bbox/width':
            dataset_util.float_list_feature(annotations['3d_bbox_width']),
            'image/object/3d_bbox/length':
            dataset_util.float_list_feature(annotations['3d_bbox_length']),
            'image/object/3d_bbox/x':
            dataset_util.float_list_feature(annotations['3d_bbox_x']),
            'image/object/3d_bbox/y':
            dataset_util.float_list_feature(annotations['3d_bbox_y']),
            'image/object/3d_bbox/z':
            dataset_util.float_list_feature(annotations['3d_bbox_z']),
            'image/object/3d_bbox/rot_y':
            dataset_util.float_list_feature(annotations['3d_bbox_rot_y']),
        }))

    return example
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
                                           encoded_image):
    """Populates a TF Example message with image annotations from a data frame.

  Args:
    annotations_data_frame: Data frame containing the annotations for a single
      image.
    label_map: String to integer label map.
    encoded_image: The encoded image string

  Returns:
    The populated TF Example, if the label of at least one object is present in
    label_map. Otherwise, returns None.
  """

    filtered_data_frame = annotations_data_frame[
        annotations_data_frame.LabelName.isin(label_map)]
    filtered_data_frame_boxes = filtered_data_frame[~filtered_data_frame.YMin.
                                                    isnull()]
    filtered_data_frame_labels = filtered_data_frame[
        filtered_data_frame.YMin.isnull()]
    image_id = annotations_data_frame.ImageID.iloc[0]

    feature_map = {
        standard_fields.TfExampleFields.object_bbox_ymin:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.YMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmin:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.XMin.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_ymax:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.YMax.as_matrix()),
        standard_fields.TfExampleFields.object_bbox_xmax:
        dataset_util.float_list_feature(
            filtered_data_frame_boxes.XMax.as_matrix()),
        standard_fields.TfExampleFields.object_class_text:
        dataset_util.bytes_list_feature(
            filtered_data_frame_boxes.LabelName.as_matrix()),
        standard_fields.TfExampleFields.object_class_label:
        dataset_util.int64_list_feature(
            filtered_data_frame_boxes.LabelName.map(
                lambda x: label_map[x]).as_matrix()),
        standard_fields.TfExampleFields.filename:
        dataset_util.bytes_feature('{}.jpg'.format(image_id)),
        standard_fields.TfExampleFields.source_id:
        dataset_util.bytes_feature(image_id),
        standard_fields.TfExampleFields.image_encoded:
        dataset_util.bytes_feature(encoded_image),
    }

    if 'IsGroupOf' in filtered_data_frame.columns:
        feature_map[standard_fields.TfExampleFields.
                    object_group_of] = dataset_util.int64_list_feature(
                        filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(
                            int))
    if 'IsOccluded' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_occluded] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsOccluded.as_matrix().astype(int))
    if 'IsTruncated' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_truncated] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsTruncated.as_matrix().astype(int))
    if 'IsDepiction' in filtered_data_frame.columns:
        feature_map[
            standard_fields.TfExampleFields.
            object_depiction] = dataset_util.int64_list_feature(
                filtered_data_frame_boxes.IsDepiction.as_matrix().astype(int))

    if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
        feature_map[standard_fields.TfExampleFields.
                    image_class_label] = dataset_util.int64_list_feature(
                        filtered_data_frame_labels.LabelName.map(
                            lambda x: label_map[x]).as_matrix())
        feature_map[standard_fields.TfExampleFields.
                    image_class_text] = dataset_util.bytes_list_feature(
                        filtered_data_frame_labels.LabelName.as_matrix()),
    return tf.train.Example(features=tf.train.Features(feature=feature_map))
Exemple #22
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id']
    annotations_list:
      list of dicts with keys:
      [u'segmentation', u'area', u'iscrowd', u'image_id',
      u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official COCO dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    image_dir: directory containing the image files.
    category_index: a dict containing COCO category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
def dict_to_tf_example(data,
                       mask_path,
                       label_map_dict,
                       image_subdirectory,
                       ignore_difficult_instances=False,
                       faces_only=True,
                       mask_type='png'):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)
    mask_path: String path to PNG encoded mask.
    label_map_dict: A map from string label names to integers ids.
    image_subdirectory: String specifying subdirectory within the
      Pascal dataset directory holding the actual image data.
    ignore_difficult_instances: Whether to skip difficult instances in the
      dataset  (default: False).
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    img_path = os.path.join(image_subdirectory, data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    with tf.gfile.GFile(mask_path, 'rb') as fid:
        encoded_mask_png = fid.read()
    encoded_png_io = io.BytesIO(encoded_mask_png)
    mask = PIL.Image.open(encoded_png_io)
    if mask.format != 'PNG':
        raise ValueError('Mask format not PNG')

    mask_np = np.asarray(mask)
    nonbackground_indices_x = np.any(mask_np != 2, axis=0)
    nonbackground_indices_y = np.any(mask_np != 2, axis=1)
    nonzero_x_indices = np.where(nonbackground_indices_x)
    nonzero_y_indices = np.where(nonbackground_indices_y)

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    masks = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue
            difficult_obj.append(int(difficult))

            if faces_only:
                xmin = float(obj['bndbox']['xmin'])
                xmax = float(obj['bndbox']['xmax'])
                ymin = float(obj['bndbox']['ymin'])
                ymax = float(obj['bndbox']['ymax'])
            else:
                xmin = float(np.min(nonzero_x_indices))
                xmax = float(np.max(nonzero_x_indices))
                ymin = float(np.min(nonzero_y_indices))
                ymax = float(np.max(nonzero_y_indices))

            xmins.append(xmin / width)
            ymins.append(ymin / height)
            xmaxs.append(xmax / width)
            ymaxs.append(ymax / height)
            class_name = get_class_name_from_filename(data['filename'])
            classes_text.append(class_name.encode('utf8'))
            classes.append(label_map_dict[class_name])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))
            if not faces_only:
                mask_remapped = (mask_np != 2).astype(np.uint8)
                masks.append(mask_remapped)

    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(data['filename'].encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label':
        dataset_util.int64_list_feature(classes),
        'image/object/difficult':
        dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated':
        dataset_util.int64_list_feature(truncated),
        'image/object/view':
        dataset_util.bytes_list_feature(poses),
    }
    if not faces_only:
        if mask_type == 'numerical':
            mask_stack = np.stack(masks).astype(np.float32)
            masks_flattened = np.reshape(mask_stack, [-1])
            feature_dict['image/object/mask'] = (
                dataset_util.float_list_feature(masks_flattened.tolist()))
        elif mask_type == 'png':
            encoded_mask_png_list = []
            for mask in masks:
                img = PIL.Image.fromarray(mask)
                output = io.BytesIO()
                img.save(output, format='PNG')
                encoded_mask_png_list.append(output.getvalue())
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png_list))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example