def setUp(self): super(ParseBase, self).setUp() examples = [ example_pb2.Example(features=feature_pb2.Features( feature={ 'a': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[1])), 'b': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[2, 3, 4])), })), example_pb2.Example(features=feature_pb2.Features( feature={ 'a': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[5])), 'b': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[6, 7, 8])), })), ] self.serialized = core.LabeledTensor( constant_op.constant([ex.SerializeToString() for ex in examples]), ['batch']) self.features = { 'a': io_ops.FixedLenFeature([], dtypes.int64), 'b': io_ops.FixedLenFeature([('x', 3)], dtypes.int64) }
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = example_pb2.Example(features=feature_pb2.Features(feature={ 'image/encoded': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), 'image/format': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), 'image/height': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[4])), 'image/width': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[5])), 'image/object/bbox/xmin': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/xmax': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/bbox/ymin': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/ymax': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/class/label': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[2])), 'image/object/mask': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=flat_mask)), })) writer.write(example.SerializeToString()) writer.close() return path
def _record(self, f, r): example = example_pb2.Example(features=feature_pb2.Features( feature={ "file": feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[f])), "record": feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[r])), "keywords": feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=self._get_keywords(f, r))) })) return example.SerializeToString()
def testMakeBatchedFeaturesDataset(self): # Set up fn = os.path.join(self.get_temp_dir(), "tf_record.txt") writer = python_io.TFRecordWriter(fn) for i in range(1024): writer.write( example_pb2.Example( features=feature_pb2.Features( feature={ "value": feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[i])) })).SerializeToString()) writer.close() dataset = readers.make_batched_features_dataset( file_pattern=fn, batch_size=32, features={"value": parsing_ops.FixedLenFeature([], dtypes.int64)}, shuffle=False, num_epochs=1, drop_final_batch=False) rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) self.assertEqual([[None]], [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) expected_output = [{ "value": [k for k in range(i, i + 8)] } for i in range(0, 1024, 8)] # pylint: disable=g-complex-comprehension self.assertDatasetProduces(rebatched_dataset, expected_output)
def test_input_fn_from_parse_example(self, fc_impl): """Tests complete flow with input_fn constructed from parse_example.""" input_dimension = 2 n_classes = 3 batch_size = 10 data = np.linspace(0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) data = data.reshape(batch_size, input_dimension) serialized_examples = [] for datum in data: example = example_pb2.Example(features=feature_pb2.Features( feature={ 'x': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=datum)), 'y': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=self._as_label(datum[:1]))), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), 'y': parsing_ops.FixedLenFeature([1], dtypes.int64), } def _train_input_fn(): feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _eval_input_fn(): feature_map = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels def _predict_input_fn(): feature_map = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) features.pop('y') return features, None self._test_complete_flow(train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, input_dimension=input_dimension, n_classes=n_classes, batch_size=batch_size, fc_impl=fc_impl)
def _write_test_data(): schema = FeatureSpecToSchema.apply({ "f0": tf.VarLenFeature(dtype=tf.int64), "f1": tf.VarLenFeature(dtype=tf.int64), "f2": tf.VarLenFeature(dtype=tf.int64) }) batches = [ [1, 4, None], [2, None, None], [3, 5, None], [None, None, None], ] example_proto = [ example_pb2.Example(features=feature_pb2.Features( feature={ "f" + str(i): feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[f])) for i, f in enumerate(batch) if f is not None })) for batch in batches ] return DataUtil.write_test_data(example_proto, schema)
def make_record(file_index): example = example_pb2.Example(features=feature_pb2.Features( feature={ "file": feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[file_index])), })) return example.SerializeToString()
def get_example_proto(cls): return [ example_pb2.Example(features=feature_pb2.Features( feature={ k: feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[v])) for k, v in d.items() })) for d in cls.values ]
def get_example_proto(values=[{"f1": 1, "f2": 2}]): return [ example_pb2.Example(features=feature_pb2.Features( feature={ k: feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[v])) for k, v in d.items() })) for d in values ]
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [ b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept' ] serialized_examples = [] for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } def _train_input_fn(): features = parsing_ops.parse_example(serialized_examples, feature_spec) labels = features.pop('label') return features, labels def _eval_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) labels = features.pop('label') return features, labels def _predict_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features.pop('label') return features, None self._test_complete_flow(train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def _write_test_data(): schema = feature_spec_to_schema({"f1": tf.FixedLenFeature((), tf.int64), "f2": tf.FixedLenFeature((), tf.int64)}) values = [{"f1": 1, "f2": 2}] example_proto = [example_pb2.Example(features=feature_pb2.Features(feature={ k: feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[v])) for k, v in d.items() })) for d in values] return DataUtil.write_test_data(example_proto, schema)
def _create_feature(feature): feature_list = feature if isinstance(feature, list) else [feature] # Each feature can be exactly one kind: # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L76 feature_type = type(feature_list[0]) if feature_type == int: return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=feature_list)) elif feature_type == str: return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=feature_list)) elif feature_type == unicode: return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=map(lambda x: str(x), feature_list))) elif feature_type == float: return feature_pb2.Feature(float_list=feature_pb2.FloatList(value=feature_list)) else: message = """Unsupported request data format: {}, {}. Valid formats: float, int, str any object that implements __iter__ or classification_pb2.ClassificationRequest""" raise ValueError(message.format(feature, type(feature)))
def _int_feature(list_of_ints): """Returns a int64_list from a list of int / bool.""" return feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=list_of_ints))
def _encoded_int64_feature(ndarray): return feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=ndarray.flatten().tolist()))
def _Int64FeatureFromList(self, ndarray): return feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=ndarray.flatten().tolist()))
from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging # Helpers for creating Example objects example = example_pb2.Example feature = feature_pb2.Feature features = lambda d: feature_pb2.Features(feature=d) bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v)) int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v)) float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v)) # Helpers for creating SequenceExample objects feature_list = lambda l: feature_pb2.FeatureList(feature=l) feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d) sequence_example = example_pb2.SequenceExample def _compare_output_to_expected(tester, dict_tensors, expected_tensors, flat_output): tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) i = 0 # Index into the flattened output of session.run() for k, v in sorted(dict_tensors.items()): # TODO(shivaniagrawal): flat_output is same as v. expected_v = expected_tensors[k]
def _int64_feature(*values): return feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=values))