コード例 #1
0
 def setUp(self):
     super(ParseBase, self).setUp()
     examples = [
         example_pb2.Example(features=feature_pb2.Features(
             feature={
                 'a':
                 feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[1])),
                 'b':
                 feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[2, 3, 4])),
             })),
         example_pb2.Example(features=feature_pb2.Features(
             feature={
                 'a':
                 feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[5])),
                 'b':
                 feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[6, 7, 8])),
             })),
     ]
     self.serialized = core.LabeledTensor(
         constant_op.constant([ex.SerializeToString() for ex in examples]),
         ['batch'])
     self.features = {
         'a': io_ops.FixedLenFeature([], dtypes.int64),
         'b': io_ops.FixedLenFeature([('x', 3)], dtypes.int64)
     }
コード例 #2
0
  def create_tf_record(self):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
    flat_mask = (4 * 5) * [1.0]
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
    example = example_pb2.Example(features=feature_pb2.Features(feature={
        'image/encoded': feature_pb2.Feature(
            bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
        'image/format': feature_pb2.Feature(
            bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])),
        'image/height': feature_pb2.Feature(
            int64_list=feature_pb2.Int64List(value=[4])),
        'image/width': feature_pb2.Feature(
            int64_list=feature_pb2.Int64List(value=[5])),
        'image/object/bbox/xmin': feature_pb2.Feature(
            float_list=feature_pb2.FloatList(value=[0.0])),
        'image/object/bbox/xmax': feature_pb2.Feature(
            float_list=feature_pb2.FloatList(value=[1.0])),
        'image/object/bbox/ymin': feature_pb2.Feature(
            float_list=feature_pb2.FloatList(value=[0.0])),
        'image/object/bbox/ymax': feature_pb2.Feature(
            float_list=feature_pb2.FloatList(value=[1.0])),
        'image/object/class/label': feature_pb2.Feature(
            int64_list=feature_pb2.Int64List(value=[2])),
        'image/object/mask': feature_pb2.Feature(
            float_list=feature_pb2.FloatList(value=flat_mask)),
    }))
    writer.write(example.SerializeToString())
    writer.close()

    return path
コード例 #3
0
 def _record(self, f, r):
     example = example_pb2.Example(features=feature_pb2.Features(
         feature={
             "file":
             feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                 value=[f])),
             "record":
             feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                 value=[r])),
             "keywords":
             feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                 value=self._get_keywords(f, r)))
         }))
     return example.SerializeToString()
コード例 #4
0
  def testMakeBatchedFeaturesDataset(self):
    # Set up
    fn = os.path.join(self.get_temp_dir(), "tf_record.txt")
    writer = python_io.TFRecordWriter(fn)
    for i in range(1024):
      writer.write(
          example_pb2.Example(
              features=feature_pb2.Features(
                  feature={
                      "value":
                          feature_pb2.Feature(
                              int64_list=feature_pb2.Int64List(value=[i]))
                  })).SerializeToString())
    writer.close()

    dataset = readers.make_batched_features_dataset(
        file_pattern=fn,
        batch_size=32,
        features={"value": parsing_ops.FixedLenFeature([], dtypes.int64)},
        shuffle=False,
        num_epochs=1,
        drop_final_batch=False)

    rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4)

    self.assertEqual([[None]],
                     [ts.as_list() for ts in _flat_shapes(rebatched_dataset)])

    expected_output = [{
        "value": [k for k in range(i, i + 8)]
    } for i in range(0, 1024, 8)]  # pylint: disable=g-complex-comprehension
    self.assertDatasetProduces(rebatched_dataset, expected_output)
コード例 #5
0
    def test_input_fn_from_parse_example(self, fc_impl):
        """Tests complete flow with input_fn constructed from parse_example."""
        input_dimension = 2
        n_classes = 3
        batch_size = 10
        data = np.linspace(0.,
                           n_classes - 1.,
                           batch_size * input_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, input_dimension)

        serialized_examples = []
        for datum in data:
            example = example_pb2.Example(features=feature_pb2.Features(
                feature={
                    'x':
                    feature_pb2.Feature(float_list=feature_pb2.FloatList(
                        value=datum)),
                    'y':
                    feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                        value=self._as_label(datum[:1]))),
                }))
            serialized_examples.append(example.SerializeToString())

        feature_spec = {
            'x': parsing_ops.FixedLenFeature([input_dimension],
                                             dtypes.float32),
            'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
        }

        def _train_input_fn():
            feature_map = parsing_ops.parse_example(serialized_examples,
                                                    feature_spec)
            features = linear_testing_utils.queue_parsed_features(feature_map)
            labels = features.pop('y')
            return features, labels

        def _eval_input_fn():
            feature_map = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            features = linear_testing_utils.queue_parsed_features(feature_map)
            labels = features.pop('y')
            return features, labels

        def _predict_input_fn():
            feature_map = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            features = linear_testing_utils.queue_parsed_features(feature_map)
            features.pop('y')
            return features, None

        self._test_complete_flow(train_input_fn=_train_input_fn,
                                 eval_input_fn=_eval_input_fn,
                                 predict_input_fn=_predict_input_fn,
                                 input_dimension=input_dimension,
                                 n_classes=n_classes,
                                 batch_size=batch_size,
                                 fc_impl=fc_impl)
コード例 #6
0
    def _write_test_data():
        schema = FeatureSpecToSchema.apply({
            "f0":
            tf.VarLenFeature(dtype=tf.int64),
            "f1":
            tf.VarLenFeature(dtype=tf.int64),
            "f2":
            tf.VarLenFeature(dtype=tf.int64)
        })
        batches = [
            [1, 4, None],
            [2, None, None],
            [3, 5, None],
            [None, None, None],
        ]

        example_proto = [
            example_pb2.Example(features=feature_pb2.Features(
                feature={
                    "f" + str(i): feature_pb2.Feature(
                        int64_list=feature_pb2.Int64List(value=[f]))
                    for i, f in enumerate(batch) if f is not None
                })) for batch in batches
        ]

        return DataUtil.write_test_data(example_proto, schema)
コード例 #7
0
 def make_record(file_index):
     example = example_pb2.Example(features=feature_pb2.Features(
         feature={
             "file":
             feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                 value=[file_index])),
         }))
     return example.SerializeToString()
コード例 #8
0
 def get_example_proto(cls):
     return [
         example_pb2.Example(features=feature_pb2.Features(
             feature={
                 k: feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[v]))
                 for k, v in d.items()
             })) for d in cls.values
     ]
コード例 #9
0
 def get_example_proto(values=[{"f1": 1, "f2": 2}]):
     return [
         example_pb2.Example(features=feature_pb2.Features(
             feature={
                 k: feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                     value=[v]))
                 for k, v in d.items()
             })) for d in values
     ]
コード例 #10
0
  def testParseExampleInputFn(self):
    """Tests complete flow with input_fn constructed from parse_example."""
    n_classes = 3
    batch_size = 10
    words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept']

    _, examples_file = tempfile.mkstemp()
    writer = python_io.TFRecordWriter(examples_file)
    for _ in range(batch_size):
      sequence_length = random.randint(1, len(words))
      sentence = random.sample(words, sequence_length)
      label = random.randint(0, n_classes - 1)
      example = example_pb2.Example(features=feature_pb2.Features(
          feature={
              'tokens':
                  feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                      value=sentence)),
              'label':
                  feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                      value=[label])),
          }))
      writer.write(example.SerializeToString())
    writer.close()

    col = seq_fc.sequence_categorical_column_with_hash_bucket(
        'tokens', hash_bucket_size=10)
    embed = fc.embedding_column(col, dimension=2)
    feature_columns = [embed]
    feature_spec = parsing_utils.classifier_parse_example_spec(
        feature_columns,
        label_key='label',
        label_dtype=dtypes.int64)

    def _train_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec)
      return dataset.map(lambda features: (features, features.pop('label')))
    def _eval_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec, num_epochs=1)
      return dataset.map(lambda features: (features, features.pop('label')))
    def _predict_input_fn():
      dataset = readers.make_batched_features_dataset(
          examples_file, batch_size, feature_spec, num_epochs=1)
      def features_fn(features):
        features.pop('label')
        return features
      return dataset.map(features_fn)

    self._test_complete_flow(
        feature_columns=feature_columns,
        train_input_fn=_train_input_fn,
        eval_input_fn=_eval_input_fn,
        predict_input_fn=_predict_input_fn,
        n_classes=n_classes,
        batch_size=batch_size)
コード例 #11
0
    def testParseExampleInputFn(self):
        """Tests complete flow with input_fn constructed from parse_example."""
        n_classes = 3
        batch_size = 10
        words = [
            b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'
        ]

        serialized_examples = []
        for _ in range(batch_size):
            sequence_length = random.randint(1, len(words))
            sentence = random.sample(words, sequence_length)
            label = random.randint(0, n_classes - 1)
            example = example_pb2.Example(features=feature_pb2.Features(
                feature={
                    'tokens':
                    feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                        value=sentence)),
                    'label':
                    feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                        value=[label])),
                }))
            serialized_examples.append(example.SerializeToString())

        feature_spec = {
            'tokens': parsing_ops.VarLenFeature(dtypes.string),
            'label': parsing_ops.FixedLenFeature([1], dtypes.int64),
        }

        def _train_input_fn():
            features = parsing_ops.parse_example(serialized_examples,
                                                 feature_spec)
            labels = features.pop('label')
            return features, labels

        def _eval_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            labels = features.pop('label')
            return features, labels

        def _predict_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            features.pop('label')
            return features, None

        self._test_complete_flow(train_input_fn=_train_input_fn,
                                 eval_input_fn=_eval_input_fn,
                                 predict_input_fn=_predict_input_fn,
                                 n_classes=n_classes,
                                 batch_size=batch_size)
コード例 #12
0
    def _write_test_data():
        schema = feature_spec_to_schema({"f1": tf.FixedLenFeature((), tf.int64),
                                         "f2": tf.FixedLenFeature((), tf.int64)})
        values = [{"f1": 1, "f2": 2}]

        example_proto = [example_pb2.Example(features=feature_pb2.Features(feature={
            k: feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[v]))
            for k, v in d.items()
        })) for d in values]

        return DataUtil.write_test_data(example_proto, schema)
コード例 #13
0
    def _create_feature(feature):
        feature_list = feature if isinstance(feature, list) else [feature]

        # Each feature can be exactly one kind:
        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L76

        feature_type = type(feature_list[0])
        if feature_type == int:
            return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=feature_list))
        elif feature_type == str:
            return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=feature_list))
        elif feature_type == unicode:
            return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=map(lambda x: str(x), feature_list)))
        elif feature_type == float:
            return feature_pb2.Feature(float_list=feature_pb2.FloatList(value=feature_list))
        else:
            message = """Unsupported request data format: {}, {}.
                            Valid formats: float, int, str any object that implements __iter__
                                           or classification_pb2.ClassificationRequest"""
            raise ValueError(message.format(feature, type(feature)))
コード例 #14
0
ファイル: vis_test.py プロジェクト: PhilPalmer/deepvariant-1
def _int_feature(list_of_ints):
    """Returns a int64_list from a list of int / bool."""
    return feature_pb2.Feature(int64_list=feature_pb2.Int64List(
        value=list_of_ints))
コード例 #15
0
ファイル: test_utils.py プロジェクト: yonghankim/tf-slim
def _encoded_int64_feature(ndarray):
    return feature_pb2.Feature(int64_list=feature_pb2.Int64List(
        value=ndarray.flatten().tolist()))
コード例 #16
0
 def _Int64FeatureFromList(self, ndarray):
     return feature_pb2.Feature(int64_list=feature_pb2.Int64List(
         value=ndarray.flatten().tolist()))
コード例 #17
0
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.util import nest
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.ops import parsing_ops
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging

# Helpers for creating Example objects
example = example_pb2.Example
feature = feature_pb2.Feature
features = lambda d: feature_pb2.Features(feature=d)
bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
# Helpers for creating SequenceExample objects
feature_list = lambda l: feature_pb2.FeatureList(feature=l)
feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
sequence_example = example_pb2.SequenceExample


def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
                                flat_output):
  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))

  i = 0  # Index into the flattened output of session.run()
  for k, v in sorted(dict_tensors.items()):
    # TODO(shivaniagrawal): flat_output is same as v.
    expected_v = expected_tensors[k]
コード例 #18
0
 def _int64_feature(*values):
     return feature_pb2.Feature(int64_list=feature_pb2.Int64List(
         value=values))