Beispiel #1
0
    def create_tf_record(self):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
        example = example_pb2.Example(features=feature_pb2.Features(feature={
            'image/encoded': feature_pb2.Feature(
                bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
            'image/format': feature_pb2.Feature(
                bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
            'image/object/bbox/xmin': feature_pb2.Feature(
                float_list=feature_pb2.FloatList(value=[0.0])),
            'image/object/bbox/xmax': feature_pb2.Feature(
                float_list=feature_pb2.FloatList(value=[1.0])),
            'image/object/bbox/ymin': feature_pb2.Feature(
                float_list=feature_pb2.FloatList(value=[0.0])),
            'image/object/bbox/ymax': feature_pb2.Feature(
                float_list=feature_pb2.FloatList(value=[1.0])),
            'image/object/class/label': feature_pb2.Feature(
                int64_list=feature_pb2.Int64List(value=[2])),
        }))
        writer.write(example.SerializeToString())
        writer.close()

        return path
Beispiel #2
0
    def create_tf_record(self):
        print('\ncreate_tf_record')

        tmp_dir = os.path.join(os.environ['HOME'], 'tmp')
        if not os.path.isdir(tmp_dir):
            os.makedirs(tmp_dir)

        path = os.path.join(tmp_dir, 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path=path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()

        example = example_pb2.Example(features=feature_pb2.Features(
            feature={
                'image/encoded':
                feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                    value=[encoded_jpeg])),
                'image/format':
                feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                    value=['jpeg'.encode('utf-8')])),
                'image/height':
                feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                    value=[4])),
                'image/width':
                feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                    value=[5])),
                'image/object/bbox/xmin':
                feature_pb2.Feature(float_list=feature_pb2.FloatList(
                    value=[0.0])),
                'image/object/bbox/xmax':
                feature_pb2.Feature(float_list=feature_pb2.FloatList(
                    value=[1.0])),
                'image/object/bbox/ymin':
                feature_pb2.Feature(float_list=feature_pb2.FloatList(
                    value=[0.0])),
                'image/object/bbox/ymax':
                feature_pb2.Feature(float_list=feature_pb2.FloatList(
                    value=[1.0])),
                'image/object/class/label':
                feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                    value=[2])),
                'image/object/mask':
                feature_pb2.Feature(float_list=feature_pb2.FloatList(
                    value=flat_mask)),
            }))
        writer.write(example.SerializeToString())
        writer.close()

        return path
Beispiel #3
0
    def create_tf_record(self, has_additional_channels=False):
        path = os.path.join(self.get_temp_dir(), 'tfrecord')
        writer = tf.python_io.TFRecordWriter(path)

        image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
        additional_channels_tensor = np.random.randint(
            255, size=(4, 5, 1)).astype(np.uint8)
        flat_mask = (4 * 5) * [1.0]
        with self.test_session():
            encoded_jpeg = tf.image.encode_jpeg(
                tf.constant(image_tensor)).eval()
            encoded_additional_channels_jpeg = tf.image.encode_jpeg(
                tf.constant(additional_channels_tensor)).eval()
        features = {
            'image/encoded':
            feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                value=[encoded_jpeg])),
            'image/format':
            feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                value=['jpeg'.encode('utf-8')])),
            'image/height':
            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])),
            'image/width':
            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])),
            'image/object/bbox/xmin':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
            'image/object/bbox/xmax':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
            'image/object/bbox/ymin':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
            'image/object/bbox/ymax':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
            'image/object/class/label':
            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])),
            'image/object/mask':
            feature_pb2.Feature(float_list=feature_pb2.FloatList(
                value=flat_mask)),
        }
        if has_additional_channels:
            features[
                'image/additional_channels/encoded'] = feature_pb2.Feature(
                    bytes_list=feature_pb2.BytesList(
                        value=[encoded_additional_channels_jpeg] * 2))
        example = example_pb2.Example(features=feature_pb2.Features(
            feature=features))
        writer.write(example.SerializeToString())
        writer.close()

        return path
Beispiel #4
0
def to_tf_example(ingradients):
    return example_pb2.Example(features=feature_pb2.Features(
    feature={
        'ingredients':
            feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                value=[bytes(x, 'utf-8') for x in ingradients]))
    }))
Beispiel #5
0
 def _record(self, f, r, l):
     example = example_pb2.Example(features=feature_pb2.Features(
         feature={
             "file":
             feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                 value=[f])),
             "record":
             feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                 value=[r])),
             "keywords":
             feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                 value=self._get_keywords(f, r))),
             "label":
             feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                 value=[compat.as_bytes(l)]))
         }))
     return example.SerializeToString()
  def testParseExampleInputFn(self):
    """Tests complete flow with input_fn constructed from parse_example."""
    n_classes = 3
    batch_size = 10
    words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept']

    _, examples_file = tempfile.mkstemp()
    writer = python_io.TFRecordWriter(examples_file)
    for _ in range(batch_size):
      sequence_length = random.randint(1, len(words))
      sentence = random.sample(words, sequence_length)
      label = random.randint(0, n_classes - 1)
      example = example_pb2.Example(features=feature_pb2.Features(
          feature={
              'tokens':
                  feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                      value=sentence)),
              'label':
                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                      value=[label])),
          }))
      writer.write(example.SerializeToString())
    writer.close()

    col = seq_fc.sequence_categorical_column_with_hash_bucket(
        'tokens', hash_bucket_size=10)
    embed = fc.embedding_column(col, dimension=2)
    feature_columns = [embed]
    feature_spec = parsing_utils.classifier_parse_example_spec(
        feature_columns,
        label_key='label',
        label_dtype=dtypes.int64)

    def _train_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec)
      return dataset.map(lambda features: (features, features.pop('label')))
    def _eval_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec, num_epochs=1)
      return dataset.map(lambda features: (features, features.pop('label')))
    def _predict_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec, num_epochs=1)
      def features_fn(features):
        features.pop('label')
        return features
      return dataset.map(features_fn)

    self._test_complete_flow(
        feature_columns=feature_columns,
        train_input_fn=_train_input_fn,
        eval_input_fn=_eval_input_fn,
        predict_input_fn=_predict_input_fn,
        n_classes=n_classes,
        batch_size=batch_size)
  def create_tf_record(self):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
    example = example_pb2.Example(features=feature_pb2.Features(feature={
        'image/encoded': feature_pb2.Feature(
            bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
        'image/format': feature_pb2.Feature(
            bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
        'image/transcript': feature_pb2.Feature(
            bytes_list=feature_pb2.BytesList(value=[
                'hello'.encode('utf-8')]))
    }))
    writer.write(example.SerializeToString())
    writer.close()

    return path
    def _create_feature(feature):
        feature_list = feature if isinstance(feature, list) else [feature]

        # Each feature can be exactly one kind:
        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L76

        feature_type = type(feature_list[0])
        if feature_type == int:
            return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=feature_list))
        elif feature_type == str:
            return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=feature_list))
        elif feature_type == unicode:
            return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=map(lambda x: str(x), feature_list)))
        elif feature_type == float:
            return feature_pb2.Feature(float_list=feature_pb2.FloatList(value=feature_list))
        else:
            message = """Unsupported request data format: {}, {}.
                            Valid formats: float, int, str any object that implements __iter__
                                           or classification_pb2.ClassificationRequest"""
            raise ValueError(message.format(feature, type(feature)))
def test_example_proto():
    image = 'aa'
    example = example_pb2.Example(features=feature_pb2.Features(
        feature={
            'image/encoded':
            feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                value=[image]))
        }))
    print('aaa')
    print(example)
    example.SerializeToString()
Beispiel #10
0
    def testParseExampleInputFn(self):
        """Tests complete flow with input_fn constructed from parse_example."""
        n_classes = 3
        batch_size = 10
        words = [
            b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'
        ]

        serialized_examples = []
        for _ in range(batch_size):
            sequence_length = random.randint(1, len(words))
            sentence = random.sample(words, sequence_length)
            label = random.randint(0, n_classes - 1)
            example = example_pb2.Example(features=feature_pb2.Features(
                feature={
                    'tokens':
                    feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                        value=sentence)),
                    'label':
                    feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                        value=[label])),
                }))
            serialized_examples.append(example.SerializeToString())

        feature_spec = {
            'tokens': parsing_ops.VarLenFeature(dtypes.string),
            'label': parsing_ops.FixedLenFeature([1], dtypes.int64),
        }

        def _train_input_fn():
            features = parsing_ops.parse_example(serialized_examples,
                                                 feature_spec)
            labels = features.pop('label')
            return features, labels

        def _eval_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            labels = features.pop('label')
            return features, labels

        def _predict_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            features.pop('label')
            return features, None

        self._test_complete_flow(train_input_fn=_train_input_fn,
                                 eval_input_fn=_eval_input_fn,
                                 predict_input_fn=_predict_input_fn,
                                 n_classes=n_classes,
                                 batch_size=batch_size)
    def testDecodeExampleWithRepeatedImages(self):
        image_shape = (2, 3, 3)
        image_format = 'png'
        image, _ = self.GenerateImage(image_format=image_format,
                                      image_shape=image_shape)
        tf_encoded = self._Encoder(image, image_format)
        with self.test_session():
            tf_string = tf_encoded.eval()

        example = example_pb2.Example(features=feature_pb2.Features(
            feature={
                'image/encoded':
                feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                    value=[tf_string, tf_string])),
                'image/format':
                self._StringFeature(image_format),
            }))
        serialized_example = example.SerializeToString()

        with self.test_session():
            serialized_example = array_ops.reshape(serialized_example,
                                                   shape=[])

            decoder = tfexample_decoder.TFExampleDecoder(
                keys_to_features={
                    'image/encoded':
                    parsing_ops.FixedLenFeature((2, ), dtypes.string),
                    'image/format':
                    parsing_ops.FixedLenFeature((),
                                                dtypes.string,
                                                default_value=image_format),
                },
                items_to_handlers={
                    'image': tfexample_decoder.Image(repeated=True)
                })
            [tf_image] = decoder.decode(serialized_example, ['image'])

            output_image = tf_image.eval()

            self.assertEqual(output_image.shape, (2, 2, 3, 3))
            self.assertAllEqual(np.squeeze(output_image[0, :, :, :]), image)
            self.assertAllEqual(np.squeeze(output_image[1, :, :, :]), image)
Beispiel #12
0
def _string_feature(value):
    value = value.encode('utf-8')
    return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=[value]))
Beispiel #13
0
 def string_to_bytes(value):
     return feature_pb2.BytesList(value=[value])
Beispiel #14
0
 def _BytesFeatureFromList(self, ndarray):
     values = ndarray.flatten().tolist()
     for i in range(len(values)):
         values[i] = values[i].encode('utf-8')
     return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
         value=values))
 def BytesList(value):
     return feature_pb2.BytesList(value=[value])
Beispiel #16
0
from tensorflow.python.data.kernel_tests import test_base
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.util import nest
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.ops import parsing_ops
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging

# Helpers for creating Example objects
example = example_pb2.Example
feature = feature_pb2.Feature
features = lambda d: feature_pb2.Features(feature=d)
bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
# Helpers for creating SequenceExample objects
feature_list = lambda l: feature_pb2.FeatureList(feature=l)
feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
sequence_example = example_pb2.SequenceExample


def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
                                flat_output):
  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))

  i = 0  # Index into the flattened output of session.run()
  for k, v in sorted(dict_tensors.items()):
    # TODO(shivaniagrawal): flat_output is same as v.
 def _bytes_feature(*values):
     return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
         value=[v.encode("utf-8") for v in values]))
Beispiel #18
0
def _bytes_feature(list_of_strings):
    """Returns a bytes_list from a list of string / byte."""
    return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
        value=list_of_strings))
 def _BytesFeatureFromList(self, ndarray):
   values = ndarray.flatten().tolist()
   return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
Beispiel #20
0
    def testDecodeJpegImageAndBoundingBox(self):
        """Test if the decoder can correctly decode the image and bounding box.

    A set of random images (represented as an image tensor) is first decoded as
    the groundtrue image. Meanwhile, the image tensor will be encoded and pass
    through the sequence example, and then decoded as images. The groundtruth
    image and the decoded image are expected to be equal. Similar tests are
    also applied to labels such as bounding box.
    """
        image_tensor = np.random.randint(256,
                                         size=(256, 256, 3)).astype(np.uint8)
        encoded_jpeg = self._EncodeImage(image_tensor)
        decoded_jpeg = self._DecodeImage(encoded_jpeg)

        sequence_example = example_pb2.SequenceExample(
            feature_lists=feature_pb2.FeatureLists(
                feature_list={
                    'image/encoded':
                    feature_pb2.FeatureList(feature=[
                        feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                            value=[encoded_jpeg])),
                    ]),
                    'bbox/xmin':
                    feature_pb2.FeatureList(feature=[
                        feature_pb2.Feature(float_list=feature_pb2.FloatList(
                            value=[0.0])),
                    ]),
                    'bbox/xmax':
                    feature_pb2.FeatureList(feature=[
                        feature_pb2.Feature(float_list=feature_pb2.FloatList(
                            value=[1.0]))
                    ]),
                    'bbox/ymin':
                    feature_pb2.FeatureList(feature=[
                        feature_pb2.Feature(float_list=feature_pb2.FloatList(
                            value=[0.0])),
                    ]),
                    'bbox/ymax':
                    feature_pb2.FeatureList(feature=[
                        feature_pb2.Feature(float_list=feature_pb2.FloatList(
                            value=[1.0]))
                    ]),
                })).SerializeToString()

        example_decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder(
        )
        tensor_dict = example_decoder.decode(
            tf.convert_to_tensor(sequence_example))

        # Test tensor dict image dimension.
        self.assertAllEqual(
            (tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
            [None, None, None, 3])
        with self.test_session() as sess:
            tensor_dict[fields.InputDataFields.image] = tf.squeeze(
                tensor_dict[fields.InputDataFields.image])
            tensor_dict[fields.InputDataFields.groundtruth_boxes] = tf.squeeze(
                tensor_dict[fields.InputDataFields.groundtruth_boxes])
            tensor_dict = sess.run(tensor_dict)

        # Test decoded image.
        self.assertAllEqual(decoded_jpeg,
                            tensor_dict[fields.InputDataFields.image])
        # Test decoded bounding box.
        self.assertAllEqual(
            [0.0, 0.0, 1.0, 1.0],
            tensor_dict[fields.InputDataFields.groundtruth_boxes])