def _test(self, kwargs, expected_values=None, expected_err=None): with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch( expected_err[0], expected_err[1]): out = parsing_ops.parse_single_example(**kwargs) sess.run(flatten_values_tensors_or_sparse(out.values())) else: # Returns dict w/ Tensors and SparseTensors. out = parsing_ops.parse_single_example(**kwargs) # Check values. tf_result = sess.run( flatten_values_tensors_or_sparse(out.values())) _compare_output_to_expected(self, out, expected_values, tf_result) # Check shapes. for k, f in kwargs["features"].items(): if isinstance( f, parsing_ops.FixedLenFeature) and f.shape is not None: self.assertEqual(tuple(out[k].get_shape()), tensor_shape.as_shape(f.shape)) elif isinstance(f, parsing_ops.VarLenFeature): self.assertEqual( tuple(out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual( tuple(out[k].values.get_shape().as_list()), (None, )) self.assertEqual( tuple(out[k].dense_shape.get_shape().as_list()), (1, ))
def _test(self, kwargs, expected_values=None, expected_err=None): with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): out = parsing_ops.parse_single_example(**kwargs) sess.run(flatten_values_tensors_or_sparse(out.values())) return else: # Returns dict w/ Tensors and SparseTensors. out = parsing_ops.parse_single_example(**kwargs) # Also include a test with the example names specified to retain # code coverage of the unfused version, and ensure that the two # versions produce the same results. out_with_example_name = parsing_ops.parse_single_example( example_names="name", **kwargs) for result_dict in [out, out_with_example_name]: result = flatten_values_tensors_or_sparse(result_dict.values()) # Check values. tf_result = self.evaluate(result) _compare_output_to_expected(self, result_dict, expected_values, tf_result) for k, f in kwargs["features"].items(): if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: self.assertEqual(tuple(out[k].get_shape().as_list()), f.shape) elif isinstance(f, parsing_ops.VarLenFeature): self.assertEqual( tuple(out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) self.assertEqual( tuple(out[k].dense_shape.get_shape().as_list()), (1,))
def _test(self, kwargs, expected_values=None, expected_err=None): with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): out = parsing_ops.parse_single_example(**kwargs) sess.run(flatten_values_tensors_or_sparse(out.values())) else: # Returns dict w/ Tensors and SparseTensors. out = parsing_ops.parse_single_example(**kwargs) # Check values. tf_result = sess.run(flatten_values_tensors_or_sparse(out.values())) _compare_output_to_expected(self, out, expected_values, tf_result) # Check shapes. for k, f in kwargs["features"].items(): if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: self.assertEqual(tuple(out[k].get_shape()), tensor_shape.as_shape(f.shape)) elif isinstance(f, parsing_ops.VarLenFeature): self.assertEqual( tuple(out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) self.assertEqual( tuple(out[k].dense_shape.get_shape().as_list()), (1,))
def parse_single_example(serialized, features, name=None, example_names=None): """Parses a single `Example` proto. See tf.parse_single_example. Args: serialized: A scalar string Tensor or LabeledTensor, a single serialized Example. features: A `dict` mapping feature keys to `labeled_tensor.FixedLenFeature` values. name: A name for this operation (optional). example_names: (Optional) A scalar string Tensor, the associated name. Returns: A `dict` mapping feature keys to `LabeledTensor` values. Raises: ValueError: if any feature is invalid. """ serialized = core.convert_to_labeled_tensor(serialized) unlabeled_features = _labeled_to_unlabeled_features(features) unlabeled_parsed = parsing_ops.parse_single_example( serialized.tensor, unlabeled_features, name, example_names) parsed = {} for name, parsed_feature in unlabeled_parsed.items(): parsed[name] = core.LabeledTensor(parsed_feature, features[name].axes) return parsed
def parse_fn(x): features = { "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0), "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""), } return parsing_ops.parse_single_example(x, features)
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-example. Args: serialized_example: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ example = parsing_ops.parse_single_example(serialized_example, self._keys_to_features) # Reshape non-sparse elements just once: for k in self._keys_to_features: v = self._keys_to_features[k] if isinstance(v, parsing_ops.FixedLenFeature): example[k] = array_ops.reshape(example[k], v.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = {key: example[key] for key in handler.keys} outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-example. Args: serialized_example: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ example = parsing_ops.parse_single_example( serialized_example, self._keys_to_features) # Reshape non-sparse elements just once: for k in self._keys_to_features: v = self._keys_to_features[k] if isinstance(v, parsing_ops.FixedLenFeature): example[k] = array_ops.reshape(example[k], v.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = {key: example[key] for key in handler.keys} outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def parse_and_predict(examples): features = parsing_ops.parse_single_example( examples[0], feature_configs) return { 'predictions': model(features['inputs']), 'layer_1_outputs': model.layers[0](features['inputs']) }
def parse_single_example_fn(x): features = { "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0), "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""), "sparse_int": parsing_ops.VarLenFeature(dtypes.int64), "sparse_str": parsing_ops.VarLenFeature(dtypes.string), } return parsing_ops.parse_single_example(x, features)
def parse_fn(serialized): features = {"x": parsing_ops.VarLenFeature(dtypes.int64)} parsed = parsing_ops.parse_single_example(serialized, features) parsed = parsed["x"].values size = array_ops.size(parsed) value = math_ops.cast(parsed, dtypes.bool) return control_flow_ops.cond( size > 0, lambda: array_ops.reshape(value, []), lambda: array_ops.zeros([], dtypes.bool))
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-example. Args: serialized_example: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ example = parsing_ops.parse_single_example(serialized_example, self._keys_to_features) def get_feature_tensor(example, keys_to_features): features = {} # Reshape non-sparse elements just once: for k in keys_to_features: v = keys_to_features[k] if isinstance(v, parsing_ops.FixedLenFeature): example[k] = array_ops.reshape(example[k], v.shape) if isinstance(example[k], tf.SparseTensor): example[k] = example[k].values if example[k].dtype is not tf.int64 and example[ k].dtype is not tf.float32: tokens = tf.string_split(example[k], delimiter=" ").values features[k] = tokens else: features[k] = example[k] return features source_feature_tensors = get_feature_tensor( example, self._source_keys_to_tensor) source_feature_tensors["source_len"] = tf.size( source_feature_tensors["source_tokens"], out_type=tf.int64) target_feature_tensors = {} have_target = False if self._target_feature_keys[0] in example: have_target = True target_feature_tensors = get_feature_tensor( example, self._target_keys_to_tensor) target_feature_tensors["target_len"] = tf.size( target_feature_tensors["target_tokens"], out_type=tf.int64) all_features = merge_dict(source_feature_tensors, target_feature_tensors) outputs = [all_features[v] for v in self._items] return outputs
def _parse_single_example(serialized): parsed = parsing_ops.parse_single_example(serialized, feature_map) result = [] for key in sorted(self._feature_map.keys()): val = parsed[key] if isinstance(val, sparse_tensor_lib.SparseTensor): dense_tensor = tf.sparse_tensor_to_dense(val) result.append(dense_tensor) else: result.append(val) return tuple(result)
def test_keyed_parse_json(self): gfile.Glob = self._orig_glob filename = self._create_temp_file( '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n' ) batch_size = 1 queue_capacity = 5 name = "my_batch" with ops.Graph().as_default() as g, self.test_session( graph=g) as session: dtypes = { "age": parsing_ops.FixedLenFeature([1], dtypes_lib.int64) } parse_fn = lambda example: parsing_ops.parse_single_example( # pylint: disable=g-long-lambda parsing_ops.decode_json_example(example), dtypes) keys, inputs = graph_io.read_keyed_batch_examples( filename, batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, parse_fn=parse_fn, name=name) self.assertAllEqual((None, ), keys.get_shape().as_list()) self.assertEqual(1, len(inputs)) self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[0]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":1"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[1]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":2"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[2]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":3"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def decode(self, data, items): """Decodes the data to returns the tensors specified by the list of items. Args: data: A possibly encoded data format. items: A list of strings, each of which indicate a particular data type. Returns: A list of `Tensors`, whose length matches the length of `items`, where each `Tensor` corresponds to each item. Raises: ValueError: If any of the items cannot be satisfied. """ example = parse_single_example(data, self.keys_to_features) return [example[i] for i in items]
def testReadWithEquivalentDataset(self): features = { "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } dataset = (core_readers.TFRecordDataset( self._filenames).map(lambda x: parsing_ops.parse_single_example( x, features)).repeat(10).batch(2)) next_element = self.getNext(dataset) for file_batch, _, _, _, record_batch, _ in self._next_expected_batch( range(self._num_files), 2, 10): actual_batch = self.evaluate(next_element()) self.assertAllEqual(file_batch, actual_batch["file"]) self.assertAllEqual(record_batch, actual_batch["record"]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def test_keyed_parse_json(self): gfile.Glob = self._orig_glob filename = self._create_temp_file( '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n') batch_size = 1 queue_capacity = 5 name = "my_batch" with ops.Graph().as_default() as g, self.test_session(graph=g) as session: dtypes = {"age": parsing_ops.FixedLenFeature([1], dtypes_lib.int64)} parse_fn = lambda example: parsing_ops.parse_single_example( # pylint: disable=g-long-lambda parsing_ops.decode_json_example(example), dtypes) keys, inputs = graph_io.read_keyed_batch_examples( filename, batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, parse_fn=parse_fn, name=name) self.assertAllEqual((None,), keys.get_shape().as_list()) self.assertEqual(1, len(inputs)) self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[0]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":1"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[1]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":2"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[2]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":3"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def decode(self, data, items): example = parsing_ops.parse_single_example(data, self._keys_to_features) # Reshape non-sparse elements just once: for k in self._keys_to_features: v = self._keys_to_features[k] if isinstance(v, parsing_ops.FixedLenFeature): example[k] = tf.reshape(example[k], v.shape) # example['image'] = tf.reshape(parsing_ops.decode_raw(example['image'], tf.uint8), [32, 32, 1]) outputs = [] for item in items: if item == 'image': outputs.append( tf.reshape( parsing_ops.decode_raw(example['image'], tf.uint8), [32, 32])) elif item == 'label': outputs.append(example['label']) elif item == 'uid': outputs.append(example['uid']) elif item == 'file_path': outputs.append(example['file_path']) elif item == 'user_set/labels': st_labels = example['user_set/labels'] outputs.append( tf.sparse_to_dense(st_labels.indices, st_labels.dense_shape, st_labels.values)) elif item == 'user_set/images': st_labels = example['user_set/labels'] st_images = example['user_set/images'] num_examples = tf.shape(st_labels)[0] outputs.append( tf.reshape( parsing_ops.decode_raw(st_images.values, tf.uint8), [num_examples, 32, 32])) elif item == 'user_set/file_paths': st_paths = example['user_set/file_paths'] outputs.append( tf.sparse_to_dense(st_paths.indices, st_paths.dense_shape, st_paths.values, default_value='')) return outputs
def testReadWithEquivalentDataset(self): features = { "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } dataset = ( core_readers.TFRecordDataset(self.test_filenames) .map(lambda x: parsing_ops.parse_single_example(x, features)) .repeat(10).batch(2)) next_element = self.getNext(dataset) for file_batch, _, _, _, record_batch, _ in self._next_expected_batch( range(self._num_files), 2, 10): actual_batch = self.evaluate(next_element()) self.assertAllEqual(file_batch, actual_batch["file"]) self.assertAllEqual(record_batch, actual_batch["record"]) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def testReadWithEquivalentDataset(self): features = { "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } dataset = (core_readers.TFRecordDataset(self.test_filenames) .map(lambda x: parsing_ops.parse_single_example(x, features)) .repeat(10).batch(2)) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer next_element = iterator.get_next() with self.test_session() as sess: sess.run(init_op) for file_batch, _, _, _, record_batch in self._next_expected_batch( range(self._num_files), 2, 10): actual_batch = sess.run(next_element) self.assertAllEqual(file_batch, actual_batch["file"]) self.assertAllEqual(record_batch, actual_batch["record"]) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def testReadWithEquivalentDataset(self): features = { "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } dataset = (core_readers.TFRecordDataset(self.test_filenames).map( lambda x: parsing_ops.parse_single_example(x, features)).repeat( 10).batch(2)) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer next_element = iterator.get_next() with self.test_session() as sess: sess.run(init_op) for file_batch, _, _, _, record_batch in self._next_expected_batch( range(self._num_files), 2, 10): actual_batch = sess.run(next_element) self.assertAllEqual(file_batch, actual_batch["file"]) self.assertAllEqual(record_batch, actual_batch["record"]) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def loop_fn(i): example_proto = array_ops.gather(examples, i) f = parsing_ops.parse_single_example(example_proto, features) return f