def testInt32WeightedSparseInt64ColumnDtypes(self): ids = fc.sparse_column_with_keys("ids", [42, 1, -1000], dtype=dtypes.int64) weighted_ids = fc.weighted_sparse_column(ids, "weights", dtype=dtypes.int32) self.assertDictEqual({ "ids": parsing_ops.VarLenFeature(dtypes.int64), "weights": parsing_ops.VarLenFeature(dtypes.int32) }, weighted_ids.config) with self.assertRaisesRegexp(ValueError, "dtype is not convertible to float"): weighted_ids = fc.weighted_sparse_column( ids, "weights", dtype=dtypes.string)
def testDecodeExampleWithBranchedBackupHandler(self): example1 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), 'image/object/class/label': self._Int64FeatureFromList(np.array([42, 10, 900])) })) example2 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), })) example3 = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/object/class/label': self._Int64FeatureFromList(np.array([42, 10, 901])) })) # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), 'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64), } backup_handler = tf_example_decoder.BackupHandler( handler=slim_example_decoder.Tensor('image/object/class/label'), backup=tf_example_decoder.LookupTensor('image/object/class/text', table)) items_to_handlers = { 'labels': backup_handler, } decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) obtained_class_ids_each_example = [] with self.test_session() as sess: sess.run(lookup_ops.tables_initializer()) for example in [example1, example2, example3]: serialized_example = array_ops.reshape( example.SerializeToString(), shape=[]) obtained_class_ids_each_example.append( decoder.decode(serialized_example)[0].eval()) self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0]) self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1]) self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def test_build_parsing_serving_input_receiver_fn(self): feature_spec = { 'int_feature': parsing_ops.VarLenFeature(dtypes.int64), 'float_feature': parsing_ops.VarLenFeature(dtypes.float32) } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) with ops.Graph().as_default(): serving_input_receiver = serving_input_receiver_fn() self.assertEqual(set(['int_feature', 'float_feature']), set(serving_input_receiver.features.keys())) self.assertEqual( set(['examples']), set(serving_input_receiver.receiver_tensors.keys())) example = example_pb2.Example() text_format.Parse( "features: { " " feature: { " " key: 'int_feature' " " value: { " " int64_list: { " " value: [ 21, 2, 5 ] " " } " " } " " } " " feature: { " " key: 'float_feature' " " value: { " " float_list: { " " value: [ 525.25 ] " " } " " } " " } " "} ", example) with self.test_session() as sess: sparse_result = sess.run( serving_input_receiver.features, feed_dict={ serving_input_receiver.receiver_tensors['examples'].name: [example.SerializeToString()] }) self.assertAllEqual([[0, 0], [0, 1], [0, 2]], sparse_result['int_feature'].indices) self.assertAllEqual([21, 2, 5], sparse_result['int_feature'].values) self.assertAllEqual([[0, 0]], sparse_result['float_feature'].indices) self.assertAllEqual([525.25], sparse_result['float_feature'].values)
def _ReadExamples(filename_queue, shape, using_ctc, reader=None): """Builds network input tensor ops for TF Example. Args: filename_queue: Queue of filenames, from tf.train_bkp.string_input_producer shape: ImageShape with the desired shape of the input. using_ctc: Take the unpadded_class labels instead of padded. reader: Function that returns an actual reader to read Examples from input files. If None, uses tf.TFRecordReader(). Returns: image: Float Tensor containing the input image scaled to [-1.28, 1.27]. height: Tensor int64 containing the height of the image. width: Tensor int64 containing the width of the image. labels: Serialized SparseTensor containing the int64 labels. text: Tensor string of the utf8 truth text. """ if reader: reader = reader() else: reader = tf.TFRecordReader() _, example_serialized = reader.read(filename_queue) example_serialized = tf.reshape(example_serialized, shape=[]) features = tf.parse_single_example( example_serialized, { 'image/encoded': parsing_ops.FixedLenFeature([1], dtype=tf.string, default_value=''), 'image/text': parsing_ops.FixedLenFeature([1], dtype=tf.string, default_value=''), 'image/class': parsing_ops.VarLenFeature(dtype=tf.int64), 'image/unpadded_class': parsing_ops.VarLenFeature(dtype=tf.int64), 'image/height': parsing_ops.FixedLenFeature([1], dtype=tf.int64, default_value=1), 'image/width': parsing_ops.FixedLenFeature([1], dtype=tf.int64, default_value=1) }) if using_ctc: labels = features['image/unpadded_class'] else: labels = features['image/class'] labels = tf.serialize_sparse(labels) image = tf.reshape(features['image/encoded'], shape=[], name='encoded') image = _ImageProcessing(image, shape) height = tf.reshape(features['image/height'], [-1]) width = tf.reshape(features['image/width'], [-1]) text = tf.reshape(features['image/text'], shape=[]) return image, height, width, labels, text
def test_to_feature_columns_and_input_fn(self): df = setup_test_df_3layer() feature_columns, input_fn = ( estimator_utils.to_feature_columns_and_input_fn( df, base_input_keys_with_defaults={"a": 1, "b": 2, "c": 3, "d": 4}, label_keys=["g"], feature_keys=["a", "b", "f"])) expected_feature_column_a = feature_column.DataFrameColumn( "a", learn.PredefinedSeries( "a", parsing_ops.FixedLenFeature(tensor_shape.unknown_shape(), dtypes.int32, 1))) expected_feature_column_b = feature_column.DataFrameColumn( "b", learn.PredefinedSeries("b", parsing_ops.VarLenFeature(dtypes.int32))) expected_feature_column_f = feature_column.DataFrameColumn( "f", learn.TransformedSeries([ learn.PredefinedSeries("c", parsing_ops.FixedLenFeature( tensor_shape.unknown_shape(), dtypes.int32, 3)), learn.PredefinedSeries("d", parsing_ops.VarLenFeature(dtypes.int32)) ], mocks.Mock2x2Transform("iue", "eui", "snt"), "out2")) expected_feature_columns = [ expected_feature_column_a, expected_feature_column_b, expected_feature_column_f ] self.assertEqual(sorted(expected_feature_columns), sorted(feature_columns)) base_features, labels = input_fn() expected_base_features = { "a": mocks.MockTensor("Tensor a", dtypes.int32), "b": mocks.MockSparseTensor("SparseTensor b", dtypes.int32), "c": mocks.MockTensor("Tensor c", dtypes.int32), "d": mocks.MockSparseTensor("SparseTensor d", dtypes.int32) } self.assertEqual(expected_base_features, base_features) expected_labels = mocks.MockTensor("Out iue", dtypes.int32) self.assertEqual(expected_labels, labels) self.assertEqual(3, len(feature_columns))
def test_export_savedmodel_proto_roundtrip(self): tmpdir = tempfile.mkdtemp() est = estimator.Estimator(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64) } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) # Perform the export. export_dir_base = os.path.join(compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_savedmodel(export_dir_base, serving_input_receiver_fn) # Check that all the files are in the right places. self.assertTrue(gfile.Exists(export_dir_base)) self.assertTrue(gfile.Exists(export_dir)) self.assertTrue( gfile.Exists( os.path.join(compat.as_bytes(export_dir), compat.as_bytes('saved_model.pb')))) self.assertTrue( gfile.Exists( os.path.join(compat.as_bytes(export_dir), compat.as_bytes('variables')))) self.assertTrue( gfile.Exists( os.path.join(compat.as_bytes(export_dir), compat.as_bytes('variables/variables.index')))) self.assertTrue( gfile.Exists( os.path.join( compat.as_bytes(export_dir), compat.as_bytes( 'variables/variables.data-00000-of-00001')))) # Restore, to validate that the export was well-formed. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: loader.load(sess, [tag_constants.SERVING], export_dir) graph_ops = [x.name for x in graph.get_operations()] self.assertTrue('input_example_tensor' in graph_ops) self.assertTrue('ParseExample/ParseExample' in graph_ops) self.assertTrue('weight' in graph_ops) # Clean up. gfile.DeleteRecursively(tmpdir)
def testCreateSequenceFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = fc.sparse_column_with_keys("id_column", ["marlo", "omar", "stringer"]) weighted_id_col = fc.weighted_sparse_column(sparse_id_col, "id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2) real_valued_col2 = fc.real_valued_column( "real_valued_default_column", dimension=5, default_value=3.0) real_valued_col3 = fc._real_valued_var_len_column( "real_valued_var_len_column", default_value=3.0, is_sparse=True) real_valued_col4 = fc._real_valued_var_len_column( "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4 ]) feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns) expected_feature_spec = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "id_column": parsing_ops.VarLenFeature(dtypes.string), "id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column": parsing_ops.FixedLenSequenceFeature( shape=[2], dtype=dtypes.float32, allow_missing=False), "real_valued_default_column": parsing_ops.FixedLenSequenceFeature( shape=[5], dtype=dtypes.float32, allow_missing=True), "real_valued_var_len_column": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_var_len_dense_column": parsing_ops.FixedLenSequenceFeature( shape=[], dtype=dtypes.float32, allow_missing=True, default_value=4.0), } self.assertDictEqual(expected_feature_spec, feature_spec)
def testSparseColumnDtypes(self): sc = fc.sparse_column_with_integerized_feature("sc", 10) self.assertDictEqual( {"sc": parsing_ops.VarLenFeature(dtype=dtypes.int64)}, sc.config) sc = fc.sparse_column_with_integerized_feature("sc", 10, dtype=dtypes.int32) self.assertDictEqual( {"sc": parsing_ops.VarLenFeature(dtype=dtypes.int32)}, sc.config) with self.assertRaisesRegexp(ValueError, "dtype must be an integer"): fc.sparse_column_with_integerized_feature("sc", 10, dtype=dtypes.float32)
def test_scaffold_is_used_for_local_init(self): tmpdir = tempfile.mkdtemp() def _model_fn_scaffold(features, labels, mode): _, _ = features, labels my_int = variables.Variable( 1, name='my_int', collections=[ops.GraphKeys.LOCAL_VARIABLES]) scores = constant_op.constant([3.]) with ops.control_dependencies([ variables.local_variables_initializer(), data_flow_ops.tables_initializer() ]): assign_op = state_ops.assign(my_int, 12345) # local_initSop must be an Operation, not a Tensor. custom_local_init_op = control_flow_ops.group(assign_op) return model_fn_lib.EstimatorSpec( mode=mode, predictions=constant_op.constant([[1.]]), loss=constant_op.constant(0.), train_op=constant_op.constant(0.), scaffold=training.Scaffold(local_init_op=custom_local_init_op), export_outputs={ 'test': export_output.ClassificationOutput(scores) }) est = estimator.Estimator(model_fn=_model_fn_scaffold) est.train(dummy_input_fn, steps=1) feature_spec = { 'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64) } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) # Perform the export. export_dir_base = os.path.join(compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_savedmodel(export_dir_base, serving_input_receiver_fn) # Restore, to validate that the custom local_init_op runs. with ops.Graph().as_default() as graph: with session.Session(graph=graph) as sess: loader.load(sess, [tag_constants.SERVING], export_dir) my_int = graph.get_tensor_by_name('my_int:0') my_int_value = sess.run(my_int) self.assertEqual(12345, my_int_value)
def testSerializedContainingSparse(self): original = [ example(features=features({"st_c": float_feature([3, 4])})), example(features=features({ "st_c": float_feature([]), # empty float list })), example(features=features({ "st_d": feature(), # feature with nothing in it })), example(features=features({ "st_c": float_feature([1, 2, -1]), "st_d": bytes_feature([b"hi"]) })) ] expected_outputs = [{ "st_c": (np.array([[0], [1]], dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32), np.array([2], dtype=np.int64)), "st_d": empty_sparse(bytes) }, { "st_c": empty_sparse(np.float32), "st_d": empty_sparse(bytes) }, { "st_c": empty_sparse(np.float32), "st_d": empty_sparse(bytes) }, { "st_c": (np.array([[0], [1], [2]], dtype=np.int64), np.array([1.0, 2.0, -1.0], dtype=np.float32), np.array([3], dtype=np.int64)), "st_d": (np.array([[0]], dtype=np.int64), np.array(["hi"], dtype=bytes), np.array([1], dtype=np.int64)) }] for proto, expected_output in zip(original, expected_outputs): self._test( { "serialized": ops.convert_to_tensor( proto.SerializeToString()), "features": { "st_c": parsing_ops.VarLenFeature(dtypes.float32), "st_d": parsing_ops.VarLenFeature(dtypes.string) }, }, expected_output)
def testEmptySerializedWithAllDefaults(self): sparse_name = "st_a" a_name = "a" b_name = "b" c_name = "c:has_a_tricky_name" a_default = [0, 42, 0] b_default = np.random.rand(3, 3).astype(bytes) c_default = np.random.rand(2).astype(np.float32) expected_st_a = ( # indices, values, shape np.empty((0, 2), dtype=np.int64), # indices np.empty((0, ), dtype=np.int64), # sp_a is DT_INT64 np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 expected_output = { sparse_name: expected_st_a, a_name: np.array(2 * [[a_default]]), b_name: np.array(2 * [b_default]), c_name: np.array(2 * [c_default]), } self._test(ops.convert_to_tensor(["", ""]), { sparse_name: parsing_ops.VarLenFeature(dtypes.int64), a_name: parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), b_name: parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), c_name: parsing_ops.FixedLenFeature( (2, ), dtypes.float32, default_value=c_default), }, expected_values=expected_output)
def testSparseColumnSingleBucket(self): sc = fc.sparse_column_with_integerized_feature("sc", 1) self.assertDictEqual( { "sc": parsing_ops.VarLenFeature(dtype=dtypes.int64) }, sc.config) self.assertEqual(1, sc._wide_embedding_lookup_arguments(None).vocab_size)
def testSequenceExampleWithEmptyFeatureInFeatureLists(self): original = sequence_example(feature_lists=feature_lists({ "st_a": feature_list([ float_feature([3.0, 4.0]), feature(), float_feature([5.0]), ]), })) serialized = original.SerializeToString() expected_st_a = ( np.array( [[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices np.array( [3.0, 4.0, 5.0], dtype=np.float32), # values np.array( [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2 expected_feature_list_output = { "st_a": expected_st_a, } self._test( { "example_name": "in1", "serialized": ops.convert_to_tensor(serialized), "sequence_features": { "st_a": parsing_ops.VarLenFeature(dtypes.float32), } }, expected_feat_list_values=expected_feature_list_output)
def parse_examples(example_protos): features = { 'target': parsing_ops.FixedLenFeature( shape=[1], dtype=dtypes.float32, default_value=0), 'age_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'age_values': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'gender_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'gender_values': parsing_ops.VarLenFeature(dtype=dtypes.float32) } return parsing_ops.parse_example( [e.SerializeToString() for e in example_protos], features)
def testDecodeExampleWithBranchedLookup(self): example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/class/text': self._BytesFeatureFromList( np.array(['cat', 'dog', 'guinea pig'])), })) serialized_example = example.SerializeToString() # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2 table = lookup_ops.index_table_from_tensor( constant_op.constant(['dog', 'guinea pig', 'cat'])) with self.test_session() as sess: sess.run(lookup_ops.tables_initializer()) serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string), } items_to_handlers = { 'labels': tf_example_decoder.LookupTensor('image/object/class/text', table), } decoder = slim_example_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) obtained_class_ids = decoder.decode(serialized_example)[0].eval() self.assertAllClose([2, 0, 1], obtained_class_ids)
def testEmptySerializedWithoutDefaultsShouldFail(self): input_features = { "st_a": parsing_ops.VarLenFeature(dtypes.int64), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=[0, 42, 0]), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=np.random.rand(3, 3).astype(bytes)), # Feature "c" is missing a default, this gap will cause failure. "c": parsing_ops.FixedLenFeature( (2,), dtype=dtypes.float32), } # Edge case where the key is there but the feature value is empty original = example(features=features({"c": feature()})) self._test( [original.SerializeToString()], input_features, expected_err=(errors_impl.InvalidArgumentError, "Feature: c \\(data type: float\\) is required")) # Standard case of missing key and value. self._test( ["", ""], input_features, expected_err=(errors_impl.InvalidArgumentError, "Feature: c \\(data type: float\\) is required"))
def make_batch_feature(self, filenames, num_epochs, batch_size, reader_num_threads=1, parser_num_threads=1, shuffle=False, shuffle_seed=None, drop_final_batch=False): self.filenames = filenames self.num_epochs = num_epochs self.batch_size = batch_size return readers.make_batched_features_dataset( file_pattern=self.filenames, batch_size=self.batch_size, features={ "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), "keywords": parsing_ops.VarLenFeature(dtypes.string) }, reader=core_readers.TFRecordDataset, num_epochs=self.num_epochs, shuffle=shuffle, shuffle_seed=shuffle_seed, reader_num_threads=reader_num_threads, parser_num_threads=parser_num_threads, drop_final_batch=drop_final_batch)
def _to_feature_spec(tensor, default_value=None): if isinstance(tensor, ops.SparseTensor): return parsing_ops.VarLenFeature(dtype=tensor.dtype) else: return parsing_ops.FixedLenFeature(shape=tensor.get_shape(), dtype=tensor.dtype, default_value=default_value)
def test_non_v1_feature_column(self): parsing_spec = self._parse_example_fn( feature_columns=[fc.sequence_numeric_column('a')], label_key='b') expected_spec = { 'a': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1, ), dtype=dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec)
def testSingleExampleWithSparseAndSparseFeatureAndDense(self): original = example(features=features({ "c": float_feature([3, 4]), "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]), "st_a": float_feature([3.0, 4.0]) })) serialized = original.SerializeToString() expected_st_a = ( np.array( [[0], [1]], dtype=np.int64), # indices np.array( [3.0, 4.0], dtype=np.float32), # values np.array( [2], dtype=np.int64)) # shape: max_values = 2 expected_sp = ( # indices, values, shape np.array( [[0], [3]], dtype=np.int64), np.array( ["a", "b"], dtype="|S"), np.array( [13], dtype=np.int64)) # max_values = 13 a_default = [1, 2, 3] b_default = np.random.rand(3, 3).astype(bytes) expected_output = { "st_a": expected_st_a, "sp": expected_sp, "a": [a_default], "b": b_default, "c": np.array( [3, 4], dtype=np.float32), } self._test( { "example_names": ops.convert_to_tensor("in1"), "serialized": ops.convert_to_tensor(serialized), "features": { "st_a": parsing_ops.VarLenFeature(dtypes.float32), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), # Feature "c" must be provided, since it has no default_value. "c": parsing_ops.FixedLenFeature((2,), dtypes.float32), } }, expected_output)
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self): expected_st_a = ( # indices, values, shape np.empty( (0, 2), dtype=np.int64), # indices np.empty( (0,), dtype=np.int64), # sp_a is DT_INT64 np.array( [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 expected_sp = ( # indices, values, shape np.array( [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array( ["a", "b", "c"], dtype="|S"), np.array( [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 original = [ example(features=features({ "c": float_feature([3, 4]), "val": bytes_feature([b"a", b"b"]), "idx": int64_feature([0, 3]) })), example(features=features({ "c": float_feature([1, 2]), "val": bytes_feature([b"c"]), "idx": int64_feature([7]) })) ] serialized = [m.SerializeToString() for m in original] a_default = [1, 2, 3] b_default = np.random.rand(3, 3).astype(bytes) expected_output = { "st_a": expected_st_a, "sp": expected_sp, "a": np.array(2 * [[a_default]]), "b": np.array(2 * [b_default]), "c": np.array( [[3, 4], [1, 2]], dtype=np.float32), } self._test( ops.convert_to_tensor(serialized), { "st_a": parsing_ops.VarLenFeature(dtypes.int64), "sp": parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), # Feature "c" must be provided, since it has no default_value. "c": parsing_ops.FixedLenFeature((2,), dtypes.float32), }, expected_values=expected_output)
def testDecodeExampleWithBoundingBox(self): num_bboxes = 10 np_ymin = np.random.rand(num_bboxes, 1) np_xmin = np.random.rand(num_bboxes, 1) np_ymax = np.random.rand(num_bboxes, 1) np_xmax = np.random.rand(num_bboxes, 1) np_bboxes = np.hstack([np_ymin, np_xmin, np_ymax, np_xmax]) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/object/bbox/ymin': self._EncodedFloatFeature(np_ymin), 'image/object/bbox/xmin': self._EncodedFloatFeature(np_xmin), 'image/object/bbox/ymax': self._EncodedFloatFeature(np_ymax), 'image/object/bbox/xmax': self._EncodedFloatFeature(np_xmax), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image/object/bbox/ymin': parsing_ops.VarLenFeature(dtypes.float32), 'image/object/bbox/xmin': parsing_ops.VarLenFeature(dtypes.float32), 'image/object/bbox/ymax': parsing_ops.VarLenFeature(dtypes.float32), 'image/object/bbox/xmax': parsing_ops.VarLenFeature(dtypes.float32), } items_to_handlers = { 'object/bbox': tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_bboxes] = decoder.decode(serialized_example, ['object/bbox']) bboxes = tf_bboxes.eval() self.assertAllClose(np_bboxes, bboxes)
def get_feature_spec(self): dtype = self.dtype # Convert, because example parser only supports float32, int64 and string. if dtype == dtypes.int32: dtype = dtypes.int64 if dtype == dtypes.float64: dtype = dtypes.float32 if self.is_sparse: return parsing_ops.VarLenFeature(dtype=dtype) return parsing_ops.FixedLenFeature(shape=self.shape[1:], dtype=dtype)
def testSerializedContainingSparse(self): original = [ example(features=features({ "st_c": float_feature([3, 4]) })), example(features=features({ "st_c": float_feature([]), # empty float list })), example(features=features({ "st_d": feature(), # feature with nothing in it })), example(features=features({ "st_c": float_feature([1, 2, -1]), "st_d": bytes_feature([b"hi"]) })) ] serialized = [m.SerializeToString() for m in original] expected_st_c = ( # indices, values, shape np.array( [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array( [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array( [4, 3], dtype=np.int64)) # batch == 2, max_elems = 3 expected_st_d = ( # indices, values, shape np.array( [[3, 0]], dtype=np.int64), np.array( ["hi"], dtype=bytes), np.array( [4, 1], dtype=np.int64)) # batch == 2, max_elems = 1 expected_output = { "st_c": expected_st_c, "st_d": expected_st_d, } self._test({ "serialized": ops.convert_to_tensor(serialized), "features": { "st_c": parsing_ops.VarLenFeature(dtypes.float32), "st_d": parsing_ops.VarLenFeature(dtypes.string) } }, expected_output)
def parse_fn(serialized): features = {"x": parsing_ops.VarLenFeature(dtypes.int64)} parsed = parsing_ops.parse_single_example(serialized, features) parsed = parsed["x"].values size = array_ops.size(parsed) value = math_ops.cast(parsed, dtypes.bool) return control_flow_ops.cond( size > 0, lambda: array_ops.reshape(value, []), lambda: array_ops.zeros([], dtypes.bool))
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [ b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept' ] serialized_examples = [] for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } def _train_input_fn(): features = parsing_ops.parse_example(serialized_examples, feature_spec) labels = features.pop('label') return features, labels def _eval_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) labels = features.pop('label') return features, labels def _predict_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features.pop('label') return features, None self._test_complete_flow(train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def test_export_savedmodel_extra_assets(self): tmpdir = tempfile.mkdtemp() est = estimator.Estimator(model_fn=_model_fn_for_export_tests) est.train(input_fn=dummy_input_fn, steps=1) feature_spec = { 'x': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'y': parsing_ops.VarLenFeature(dtype=dtypes.int64) } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) # Create a fake asset. extra_file_name = os.path.join(compat.as_bytes(tmpdir), compat.as_bytes('my_extra_file')) extra_file = gfile.GFile(extra_file_name, mode='w') extra_file.write(_EXTRA_FILE_CONTENT) extra_file.close() # Perform the export. assets_extra = {'some/sub/directory/my_extra_file': extra_file_name} export_dir_base = os.path.join(compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_savedmodel(export_dir_base, serving_input_receiver_fn, assets_extra=assets_extra) # Check that the asset files are in the right places. expected_extra_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes('assets.extra/some/sub/directory/my_extra_file')) self.assertTrue( gfile.Exists( os.path.join(compat.as_bytes(export_dir), compat.as_bytes('assets.extra')))) self.assertTrue(gfile.Exists(expected_extra_path)) self.assertEqual( compat.as_bytes(_EXTRA_FILE_CONTENT), compat.as_bytes(gfile.GFile(expected_extra_path).read())) # cleanup gfile.DeleteRecursively(tmpdir)
def testDecodeExampleShapeKeyTensor(self): np_image = np.random.rand(2, 3, 1).astype('f') np_labels = np.array([[[1], [2], [3]], [[4], [5], [6]]]) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image': self._EncodedFloatFeature(np_image), 'image/shape': self._EncodedInt64Feature(np.array(np_image.shape)), 'labels': self._EncodedInt64Feature(np_labels), 'labels/shape': self._EncodedInt64Feature(np.array(np_labels.shape)), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) keys_to_features = { 'image': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'image/shape': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'labels/shape': parsing_ops.VarLenFeature(dtype=dtypes.int64), } items_to_handlers = { 'image': tfexample_decoder.Tensor('image', shape_keys='image/shape'), 'labels': tfexample_decoder.Tensor('labels', shape_keys='labels/shape'), } decoder = tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) [tf_image, tf_labels] = decoder.decode(serialized_example, ['image', 'labels']) self.assertAllEqual(tf_image.eval(), np_image) self.assertAllEqual(tf_labels.eval(), np_labels)
def testBasic(self): with session.Session() as sess: examples = array_ops.placeholder(dtypes.string, shape=[1]) feature_to_type = { 'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0), 'y': parsing_ops.VarLenFeature(dtypes.string) } result = parsing_ops.parse_example(examples, feature_to_type) parse_example_op = result['x'].op config = extract_example_parser_configuration( parse_example_op, sess) expected = self.getExpectedConfig(parse_example_op.type) self.assertProtoEquals(expected, config)
def testOldStyleReader(self): with self.assertRaisesRegex( TypeError, r"The `reader` argument must return a `Dataset` object. " r"`tf.ReaderBase` subclasses are not supported."): _ = readers.make_batched_features_dataset( file_pattern=self.test_filenames[0], batch_size=32, features={ "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), "keywords": parsing_ops.VarLenFeature(dtypes.string), "label": parsing_ops.FixedLenFeature([], dtypes.string), }, reader=io_ops.TFRecordReader)