def _test(self, kwargs, expected_values=None, expected_err=None): with self.test_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): out = parsing_ops.parse_example(**kwargs) sess.run(flatten_values_tensors_or_sparse(out.values())) return else: # Returns dict w/ Tensors and SparseTensors. out = parsing_ops.parse_example(**kwargs) result = flatten_values_tensors_or_sparse(out.values()) # Check values. tf_result = sess.run(result) _compare_output_to_expected(self, out, expected_values, tf_result) # Check shapes; if serialized is a Tensor we need its size to # properly check. serialized = kwargs["serialized"] batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor) else np.asarray(serialized).size) for k, f in kwargs["features"].items(): if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: self.assertEqual( tuple(out[k].get_shape().as_list()), (batch_size,) + f.shape) elif isinstance(f, parsing_ops.VarLenFeature): self.assertEqual( tuple(out[k].indices.get_shape().as_list()), (None, 2)) self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) self.assertEqual( tuple(out[k].dense_shape.get_shape().as_list()), (2,))
def _writeDummySavedModel(self, path, feature_name): """Writes a classifier with two input features to the given path.""" with ops.Graph().as_default(): examples = array_ops.placeholder(dtypes.string, name="input_node") feature_configs = { feature_name: parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.float32), } features = parsing_ops.parse_example(examples, feature_configs) feature = features[feature_name] variable_node = variables.VariableV1(1.0, name="variable_node") scores = math_ops.multiply(variable_node, feature, name="output_node") class_feature = array_ops.fill(array_ops.shape(feature), "class_%s" % feature_name) classes = array_ops.transpose(class_feature) with session.Session() as sess: sess.run(variables.global_variables_initializer()) signature = ( signature_def_utils.classification_signature_def( examples=examples, classes=classes, scores=scores,)) builder = saved_model_builder.SavedModelBuilder(path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature, },) builder.save(as_text=True)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} placeholders[feature_keys.TrainEvalFeatures.TIMES] = ( array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length])) # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() model_start_state = self._model.get_start_state() for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( model_start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state_tensor.get_shape()) placeholders[prefixed_state_name] = array_ops.placeholder( name=prefixed_state_name, shape=state_shape_with_batch, dtype=state_tensor.dtype) return export_lib.ServingInputReceiver(placeholders, placeholders)
def _predict_input_fn(): feature_map = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = _queue_parsed_features(feature_map) features.pop('y') return features, None
def parse_example(serialized, features, name=None, example_names=None): """Parse `Example` protos into a `dict` of labeled tensors. See tf.parse_example. Args: serialized: A 1-D LabeledTensor of strings, a batch of binary serialized `Example` protos. features: A `dict` mapping feature keys to `labeled_tensor.FixedLenFeature` values. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping feature keys to `LabeledTensor` values. The single axis from `serialized` will be prepended to the axes provided by each feature. Raises: ValueError: if any feature is invalid. """ serialized = core.convert_to_labeled_tensor(serialized) unlabeled_features = _labeled_to_unlabeled_features(features) unlabeled_parsed = parsing_ops.parse_example( serialized.tensor, unlabeled_features, name, example_names) parsed = {} for name, parsed_feature in unlabeled_parsed.items(): axes = list(serialized.axes.values()) + features[name].axes parsed[name] = core.LabeledTensor(parsed_feature, axes) return parsed
def _eval_input_fn(): feature_map = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels
def create_example_parser_from_signatures(signatures, examples_batch, single_feature_name="feature"): """Creates example parser from given signatures. Args: signatures: Dict of `TensorSignature` objects or single `TensorSignature`. examples_batch: string `Tensor` of serialized `Example` proto. single_feature_name: string, single feature name. Returns: features: `Tensor` or `dict` of `Tensor` objects. """ feature_spec = {} if not isinstance(signatures, dict): feature_spec[single_feature_name] = signatures.get_feature_spec() else: feature_spec = {key: signatures[key].get_feature_spec() for key in signatures} features = parsing_ops.parse_example(examples_batch, feature_spec) if not isinstance(signatures, dict): # Returns single feature, casts if needed. features = features[single_feature_name] if not signatures.dtype.is_compatible_with(features.dtype): features = math_ops.cast(features, signatures.dtype) return features # Returns dict of features, casts if needed. for name in features: if not signatures[name].dtype.is_compatible_with(features[name].dtype): features[name] = math_ops.cast(features[name], signatures[name].dtype) return features
def serving_input_receiver_fn(): """An input_fn that expects a serialized tf.Example.""" serialized_tf_example = array_ops.placeholder(dtype=dtypes.string, shape=[default_batch_size], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = parsing_ops.parse_example(serialized_tf_example, feature_spec) return ServingInputReceiver(features, receiver_tensors)
def read_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_all_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. parser_num_threads: The number of threads to parse examples. name: Name of resulting op. Returns: A dict of `Tensor` or `SparseTensor` objects for each in `features`. Raises: ValueError: for invalid inputs. """ with ops.op_scope([file_pattern], name, 'read_batch_features') as scope: examples = read_batch_examples( file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, name=scope) # Parse features into tensors in many threads and put on the queue. features_list = [] for _ in range(parser_num_threads): features_list.append(parsing_ops.parse_example(examples, features)) return input_ops.batch_join( features_list, batch_size=batch_size, capacity=queue_capacity, enqueue_many=True, name='parse_example_batch_join')
def input_fn(): """An input_fn that expects a serialized tf.Example.""" serialized_tf_example = array_ops.placeholder(dtype=dtypes.string, shape=[default_batch_size], name='input_example_tensor') inputs = {'examples': serialized_tf_example} features = parsing_ops.parse_example(serialized_tf_example, feature_spec) labels = None # these are not known in serving! return InputFnOps(features, labels, inputs)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. A typical usage is as follows: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations country = sparse_column_with_keys(column_name="native_country", keys=["US", "BRA", ...]) country_emb = embedding_column(sparse_id_column=country, dimension=3, combiner="sum") occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, combiner="sum") occupation_x_country = crossed_column(columns=[occupation, country], hash_bucket_size=10000) age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) my_features = [occupation_emb, age_buckets, country_emb] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def _parse_example(serialized, features): parsed = parsing_ops.parse_example(serialized, features) result = [] for key in sorted(features.keys()): val = parsed[key] if isinstance(val, sparse_tensor_lib.SparseTensor): result.extend([val.indices, val.values, val.dense_shape]) else: result.append(val) return tuple(result)
def _ReadAndCheckRowsUsingFeatures(self, num_rows): self.server.handler.num_rows = num_rows with self.test_session() as sess: feature_configs = { "int64_col": parsing_ops.FixedLenFeature( [1], dtype=dtypes.int64), "string_col": parsing_ops.FixedLenFeature( [1], dtype=dtypes.string, default_value="s_default"), } reader = cloud.BigQueryReader( project_id=_PROJECT, dataset_id=_DATASET, table_id=_TABLE, num_partitions=4, features=feature_configs, timestamp_millis=1, test_end_point=("%s:%s" % (self.server.httpd.server_address[0], self.server.httpd.server_address[1]))) key, value = _SetUpQueue(reader) seen_rows = [] features = parsing_ops.parse_example( array_ops.reshape(value, [1]), feature_configs) for _ in range(num_rows): int_value, str_value = sess.run( [features["int64_col"], features["string_col"]]) # Parse values returned from the session. self.assertEqual(int_value.shape, (1, 1)) self.assertEqual(str_value.shape, (1, 1)) int64_col = int_value[0][0] string_col = str_value[0][0] seen_rows.append(int64_col) # Compare. expected_row = _ROWS[int64_col] self.assertEqual(int64_col, expected_row[0]) self.assertEqual( compat.as_str(string_col), ("s_%d" % int64_col) if expected_row[1] else "s_default") self.assertItemsEqual(seen_rows, range(num_rows)) with self.assertRaisesOpError("is closed and has insufficient elements " "\\(requested 1, current size 0\\)"): sess.run([key, value])
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in (exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) batch_size_tensor = array_ops.shape(time_placeholder)[0] placeholders.update( self._model_start_state_placeholders( batch_size_tensor, static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(placeholders, placeholders)
def testBasic(self): golden_config = example_parser_configuration_pb2.ExampleParserConfiguration( ) text_format.Parse(BASIC_PROTO, golden_config) with session.Session() as sess: examples = array_ops.placeholder(dtypes.string, shape=[1]) feature_to_type = { 'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0), 'y': parsing_ops.VarLenFeature(dtypes.string) } _ = parsing_ops.parse_example(examples, feature_to_type) parse_example_op = sess.graph.get_operation_by_name( 'ParseExample/ParseExample') config = extract_example_parser_configuration(parse_example_op, sess) self.assertProtoEquals(golden_config, config)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. A typical usage is as follows: ``` columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations country = sparse_column_with_keys("country", ["US", "BRA", ...]) country_embedding = embedding_column(query_word, dimension=3, combiner="sum") query_word = sparse_column_with_hash_bucket( "query_word", hash_bucket_size=int(1e6)) query_embedding = embedding_column(query_word, dimension=16, combiner="sum") age_bucket = bucketized_column(real_valued_column("age"), boundaries=[18+i*5 for i in range(10)]) my_features = [query_embedding, age_bucket, country_embedding] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def parse_examples(example_protos): features = { 'target': parsing_ops.FixedLenFeature( shape=[1], dtype=dtypes.float32, default_value=0), 'age_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'age_values': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'gender_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'gender_values': parsing_ops.VarLenFeature(dtype=dtypes.float32) } return parsing_ops.parse_example( [e.SerializeToString() for e in example_protos], features)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features,)) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length,) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length,) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length,), dtype=dtypes.string) else: # VarLenFeature raise ValueError("VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder( shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example( serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver( features, {"examples": tfexamples})
def _get_exogenous_embedding_shape(self): """Computes the shape of the vector returned by _process_exogenous_features. Returns: The shape as a list. Does not include a batch dimension. """ if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): parsed_features = ( feature_column.make_parse_example_spec( self._exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) embedded = feature_column.input_layer( features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:]
def create_example_parser_from_signatures(signatures, examples_batch, single_feature_name="feature"): """Creates example parser from given signatures. Args: signatures: Dict of `TensorSignature` objects or single `TensorSignature`. examples_batch: string `Tensor` of serialized `Example` proto. single_feature_name: string, single feature name. Returns: features: `Tensor` or `dict` of `Tensor` objects. """ feature_spec = {} if not isinstance(signatures, dict): feature_spec[single_feature_name] = signatures.get_feature_spec() else: feature_spec = {key: signatures[key].get_feature_spec() for key in signatures} return parsing_ops.parse_example(examples_batch, feature_spec)
def read_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, queue_capacity=10000, num_threads=1, name='dequeue_examples'): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. queue_capacity: Capacity for input queue. num_threads: The number of threads enqueuing examples. name: Name of resulting op. Returns: A dict of `Tensor` or `SparseTensor` objects for each in `features`. Raises: ValueError: for invalid inputs. """ examples = read_batch_examples( file_pattern, batch_size, reader, randomize_input, queue_capacity, num_threads, name=name) # Parse features into tensors. return parsing_ops.parse_example(examples, features)
def test_parse_single_example(self): def _int64_feature(*values): return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values)) def _bytes_feature(*values): return feature_pb2.Feature( bytes_list=feature_pb2.BytesList( value=[v.encode("utf-8") for v in values])) examples = constant_op.constant([ example_pb2.Example( features=feature_pb2.Features( feature={ "dense_int": _int64_feature(i), "dense_str": _bytes_feature(str(i)), "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8), "sparse_str": _bytes_feature(*["abc"] * i) })).SerializeToString() for i in range(10) ]) features = { "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0), "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""), "sparse_int": parsing_ops.VarLenFeature(dtypes.int64), "sparse_str": parsing_ops.VarLenFeature(dtypes.string), } def loop_fn(i): example_proto = array_ops.gather(examples, i) f = parsing_ops.parse_single_example(example_proto, features) return f pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10) manual = parsing_ops.parse_example(examples, features) self.run_and_assert_equal(pfor, manual)
def _writeDummySavedModel(self, path, feature_name, tags): """Writes a classifier with two input features to the given path.""" with ops.Graph().as_default(): examples = array_ops.placeholder(dtypes.string, name="input_node") feature_configs = { feature_name: parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.float32), } features = parsing_ops.parse_example(examples, feature_configs) feature = features[feature_name] variable_node = variables.VariableV1(1.0, name="variable_node") scores = math_ops.multiply(variable_node, feature, name="output_node") class_feature = array_ops.fill(array_ops.shape(feature), "class_%s" % feature_name) classes = array_ops.transpose(class_feature) with session.Session() as sess: sess.run(variables.global_variables_initializer()) signature = (signature_def_utils.classification_signature_def( examples=examples, classes=classes, scores=scores, )) builder = saved_model_builder.SavedModelBuilder(path) builder.add_meta_graph_and_variables( sess, tags, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature, }, ) builder.save(as_text=True)
def read_keyed_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_local_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. parser_num_threads: The number of threads to parse examples. name: Name of resulting op. Returns: A dict of `Tensor` or `SparseTensor` objects for each in `features`. If `keep_keys` is `True`, returns tuple of string `Tensor` and above dict. Raises: ValueError: for invalid inputs. """ with ops.op_scope([file_pattern], name, 'read_batch_features') as scope: keys, examples = read_keyed_batch_examples( file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, read_batch_size=batch_size, name=scope) if parser_num_threads == 1: # Avoid queue overhead for single thread return keys, parsing_ops.parse_example(examples, features) # Parse features into tensors in many threads and put on the queue. features_list = [] for _ in range(parser_num_threads): feature_dict = parsing_ops.parse_example(examples, features) feature_dict[KEY_FEATURE_NAME] = keys features_list.append(feature_dict) queued_features = input_ops.batch_join(features_list, batch_size=batch_size, capacity=queue_capacity, enqueue_many=True, name='parse_example_batch_join') queued_keys = queued_features.pop(KEY_FEATURE_NAME) return queued_keys, queued_features
def make_batched_features_dataset(file_pattern, batch_size, features, reader=core_readers.TFRecordDataset, reader_args=None, num_epochs=None, shuffle=True, shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=1, reader_num_threads=1, parser_num_threads=2, sloppy_ordering=False, drop_final_batch=False): """Returns a `Dataset` of feature dictionaries from `Example` protos. Example: ``` serialized_examples = [ features { feature { key: "age" value { int64_list { value: [ 0 ] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } } }, features { feature { key: "age" value { int64_list { value: [] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "sports" ] } } } } ] ``` We can use arguments: ``` features: { "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), "gender": FixedLenFeature([], dtype=tf.string), "kws": VarLenFeature(dtype=tf.string), } ``` And the expected output is: ```python { "age": [[0], [-1]], "gender": [["f"], ["f"]], "kws": SparseTensor( indices=[[0, 0], [0, 1], [1, 0]], values=["code", "art", "sports"] dense_shape=[2, 2]), } ``` Args: file_pattern: List of files or patterns of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int representing the number of records to combine in a single batch. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. See `tf.parse_example`. reader: A function or class that can be called with a `filenames` tensor and (optional) `reader_args` and returns a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`. reader_args: Additional arguments to pass to the reader class. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. Defaults to `None`. shuffle: A boolean, indicates whether the input should be shuffled. Defaults to `True`. shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity ensures better shuffling but would increase memory usage and startup time. shuffle_seed: Randomization seed to use for shuffling. prefetch_buffer_size: Number of feature batches to prefetch in order to improve performance. Recommended value is the number of batches consumed per training step (default is 1). reader_num_threads: Number of threads used to read `Example` records. If >1, the results will be interleaved. parser_num_threads: Number of threads to use for parsing `Example` tensors into a dictionary of `Feature` tensors. sloppy_ordering: If `True`, reading performance will be improved at the cost of non-deterministic ordering. If `False`, the order of elements produced is deterministic prior to shuffling (elements are still randomized if `shuffle=True`. Note that if the seed is set, then order of elements after shuffling is deterministic). Defaults to `False`. drop_final_batch: If `True`, and the batch size does not evenly divide the input dataset size, the final smaller batch will be dropped. Defaults to `False`. Returns: A dataset of `dict` elements. Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects. """ # Create dataset of all matching filenames filenames = _get_file_names(file_pattern, False) dataset = dataset_ops.Dataset.from_tensor_slices(filenames) if shuffle: dataset = dataset.shuffle(len(filenames), shuffle_seed) # Read `Example` records from files as tensor objects. if reader_args is None: reader_args = [] # Read files sequentially (if reader_num_threads=1) or in parallel dataset = dataset.apply( interleave_ops.parallel_interleave( lambda filename: reader(filename, *reader_args), cycle_length=reader_num_threads, sloppy=sloppy_ordering)) # Extract values if the `Example` tensors are stored as key-value tuples. if dataset.output_types == (dtypes.string, dtypes.string): dataset = dataset.map(lambda _, v: v) # Apply dataset repeat and shuffle transformations. dataset = _maybe_shuffle_and_repeat( dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed) if drop_final_batch: dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size)) else: dataset = dataset.batch(batch_size) # Parse `Example` tensors to a dictionary of `Feature` tensors. dataset = dataset.map( lambda x: parsing_ops.parse_example(x, features), num_parallel_calls=parser_num_threads) # TODO(rachelim): Add an optional label_name argument for extracting the label # from the features dictionary, to comply with the type expected by the # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function. dataset = dataset.prefetch(prefetch_buffer_size) return dataset
def _get_feature_ops_from_example(self, examples_batch): column_types = layers.create_feature_spec_for_parsing( (self._get_linear_feature_columns() or []) + (self._get_dnn_feature_columns() or []) ) features = parsing_ops.parse_example(examples_batch, column_types) return features
def _train_input_fn(): feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) _, features = graph_io.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels
def read_batch_features(file_pattern, batch_size, features, reader, reader_args=None, randomize_input=True, num_epochs=None, capacity=10000): """Reads batches of Examples. Example: ``` serialized_examples = [ features { feature { key: "age" value { int64_list { value: [ 0 ] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } } }, features { feature { key: "age" value { int64_list { value: [] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "sports" ] } } } } ] ``` We can use arguments: ``` features: { "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), "gender": FixedLenFeature([], dtype=tf.string), "kws": VarLenFeature(dtype=tf.string), } ``` And the expected output is: ```python { "age": [[0], [-1]], "gender": [["f"], ["f"]], "kws": SparseTensor( indices=[[0, 0], [0, 1], [1, 0]], values=["code", "art", "sports"] dense_shape=[2, 2]), } ``` Args: file_pattern: List of files or patterns of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int representing the number of consecutive elements of this dataset to combine in a single batch. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. See `tf.parse_example`. reader: A function or class that can be called with a `filenames` tensor and (optional) `reader_args` and returns a `Dataset` of Examples. reader_args: Additional arguments to pass to the reader class. randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. capacity: Capacity of the ShuffleDataset. A large capacity ensures better shuffling but would increase memory usage and startup time. Returns: A dict from keys in features to Tensor or SparseTensor objects. """ filenames = _get_file_names(file_pattern, randomize_input) if reader_args: dataset = reader(filenames, *reader_args) else: dataset = reader(filenames) if dataset.output_types == (dtypes.string, dtypes.string): dataset = dataset.map(lambda _, v: v) if num_epochs != 1: dataset = dataset.repeat(num_epochs) if randomize_input: dataset = dataset.shuffle(capacity) dataset = dataset.batch(batch_size) dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features)) iterator = dataset.make_one_shot_iterator() outputs = iterator.get_next() return outputs
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. Example: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations sparse_feature_a = sparse_column_with_keys( column_name="sparse_feature_a", keys=["AB", "CD", ...]) embedding_feature_a = embedding_column( sparse_id_column=sparse_feature_a, dimension=3, combiner="sum") sparse_feature_b = sparse_column_with_hash_bucket( column_name="sparse_feature_b", hash_bucket_size=1000) embedding_feature_b = embedding_column( sparse_id_column=sparse_feature_b, dimension=16, combiner="sum") crossed_feature_a_x_b = crossed_column( columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000) real_feature = real_valued_column("real_feature") real_feature_buckets = bucketized_column( source_column=real_feature, boundaries=[...]) my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def filter_fn(keys, examples_json): del keys serialized = parsing_ops.decode_json_example(examples_json) examples = parsing_ops.parse_example(serialized, features) return math_ops.less(examples["age"], 2)
def read_keyed_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, feature_queue_capacity=100, num_queue_runners=2, parser_num_threads=None, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_local_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. feature_queue_capacity: Capacity of the parsed features queue. num_queue_runners: Number of queue runners to start for the feature queue, Adding multiple queue runners for the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than parsing. parser_num_threads: (Deprecated) The number of threads to parse examples. name: Name of resulting op. Returns: Returns tuple of: - `Tensor` of string keys. - A dict of `Tensor` or `SparseTensor` objects for each in `features`. Raises: ValueError: for invalid inputs. """ if parser_num_threads: # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016. logging.warning( 'parser_num_threads is deprecated, it will be removed on' 'Sept 3 2016') with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope: keys, examples = read_keyed_batch_examples( file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, read_batch_size=batch_size, name=scope) # Parse the example. feature_map = parsing_ops.parse_example(examples, features) # Lets also add preprocessed tensors into the queue types for each item of # the queue. tensors_to_enqueue = [] # Each entry contains the key, and a boolean which indicates whether the # tensor was a sparse tensor. tensors_mapping = [] # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse # tensors into a queue. This could be taken care in somewhere else so others # can reuse it. Also, QueueBase maybe extended to handle sparse tensors # directly. for key in sorted(feature_map.keys()): tensor = feature_map[key] if isinstance(tensor, ops.SparseTensor): tensors_mapping.append((key, True)) tensors_to_enqueue.extend( [tensor.indices, tensor.values, tensor.shape]) else: tensors_mapping.append((key, False)) tensors_to_enqueue.append(tensor) tensors_to_enqueue.append(keys) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes) # Add a summary op to debug if our feature queue is full or not. logging_ops.scalar_summary( 'queue/parsed_features/%s/fraction_of_%d_full' % (input_queue.name, feature_queue_capacity), math_ops.cast(input_queue.size(), dtypes.float32) * (1. / feature_queue_capacity)) # Add multiple queue runners so that the queue is always full. Adding more # than two queue-runners may hog the cpu on the worker to fill up the queue. for _ in range(num_queue_runners): queue_runner.add_queue_runner( queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() # Reset shapes on dequeued tensors. for i in range(len(tensors_to_enqueue)): dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape()) # Recreate feature mapping according to the original dictionary. dequeued_feature_map = {} index = 0 for key, is_sparse_tensor in tensors_mapping: if is_sparse_tensor: # Three tensors are (indices, values, shape). dequeued_feature_map[key] = ops.SparseTensor( dequeued_tensors[index], dequeued_tensors[index + 1], dequeued_tensors[index + 2]) index += 3 else: dequeued_feature_map[key] = dequeued_tensors[index] index += 1 dequeued_keys = dequeued_tensors[-1] return dequeued_keys, dequeued_feature_map
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} time_placeholder = array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length]) placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros( shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = ( feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder( shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items()} for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least(1).as_list()) feature_shape = ([default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() # Evaluate the initial state as same-dtype "zero" values. These zero # constants aren't used, but are necessary for feeding to # placeholder_with_default for the "cold start" case where state is not # fed to the model. def _zeros_like_constant(tensor): return tensor_util.constant_value(array_ops.zeros_like(tensor)) start_state = nest.map_structure( _zeros_like_constant, self._model.get_start_state()) batch_size_tensor = array_ops.shape(time_placeholder)[0] for prefixed_state_name, state in ts_head_lib.state_to_dictionary( start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state.shape) default_state_broadcast = array_ops.tile( state[None, ...], multiples=array_ops.concat( [batch_size_tensor[None], array_ops.ones(len(state.shape), dtype=dtypes.int32)], axis=0)) placeholders[prefixed_state_name] = array_ops.placeholder_with_default( input=default_state_broadcast, name=prefixed_state_name, shape=state_shape_with_batch) return export_lib.ServingInputReceiver(placeholders, placeholders)
def _process_records(self, examples): """Parse `tf.Example`s into `Tensors`.""" return parsing_ops.parse_example(serialized=examples, features=self._features)
def _train_input_fn(): feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) features = linear_testing_utils.queue_parsed_features(feature_map) labels = features.pop('y') return features, labels
def _apply_transform(self, input_tensors): parsed_values = parsing_ops.parse_example(input_tensors[0], features=self._ordered_features) # pylint: disable=not-callable return self.return_type(**parsed_values)
def _read_keyed_batch_features_shared_queue(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, feature_queue_capacity=100, num_queue_runners=2, parse_fn=None, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a shared queue for file names, setup a worker queue that gets filenames from the shared queue, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_local_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. feature_queue_capacity: Capacity of the parsed features queue. num_queue_runners: Number of queue runners to start for the feature queue, Adding multiple queue runners for the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than parsing. parse_fn: Parsing function, takes `Example` Tensor returns parsed representation. If `None`, no parsing is done. name: Name of resulting op. Returns: Returns tuple of: - `Tensor` of string keys. - A dict of `Tensor` or `SparseTensor` objects for each in `features`. Raises: ValueError: for invalid inputs. """ with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope: keys, examples = _read_keyed_batch_examples_shared_queue( file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, read_batch_size=batch_size, parse_fn=parse_fn, name=scope) # Parse the example. feature_map = parsing_ops.parse_example(examples, features) return queue_parsed_features( feature_map, keys=keys, feature_queue_capacity=feature_queue_capacity, num_queue_runners=num_queue_runners, name=scope)
def _train_input_fn(): features = parsing_ops.parse_example(serialized_examples, feature_spec) labels = features.pop('label') return features, labels
def _eval_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) labels = features.pop('label') return features, labels
def _predict_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features.pop('label') return features, None
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} placeholders[feature_keys.TrainEvalFeatures.TIMES] = ( array_ops.placeholder( name=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64, shape=[default_batch_size, default_series_length])) # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( array_ops.placeholder_with_default( name=feature_keys.TrainEvalFeatures.VALUES, input=array_ops.zeros(shape=[ default_batch_size if default_batch_size else 0, default_series_length if default_series_length else 0, self._model.num_features ], dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) if self._model.exogenous_feature_columns: with ops.Graph().as_default(): # Default placeholders have only an unknown batch dimension. Make them # in a separate graph, then splice in the series length to the shapes # and re-create them in the outer graph. parsed_features = (feature_column.make_parse_example_spec( self._model.exogenous_feature_columns)) placeholder_features = parsing_ops.parse_example( serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), features=parsed_features) exogenous_feature_shapes = { key: (value.get_shape(), value.dtype) for key, value in placeholder_features.items() } for feature_key, (batch_only_feature_shape, value_dtype) in ( exogenous_feature_shapes.items()): batch_only_feature_shape = ( batch_only_feature_shape.with_rank_at_least( 1).as_list()) feature_shape = ( [default_batch_size, default_series_length] + batch_only_feature_shape[1:]) placeholders[feature_key] = array_ops.placeholder( dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() model_start_state = self._model.get_start_state() for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( model_start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size, )).concatenate( state_tensor.get_shape()) placeholders[prefixed_state_name] = array_ops.placeholder( name=prefixed_state_name, shape=state_shape_with_batch, dtype=state_tensor.dtype) return export_lib.ServingInputReceiver(placeholders, placeholders)
def _train_input_fn(): feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) features = _queue_parsed_features(feature_map) labels = features.pop('y') return features, labels
def _get_feature_ops_from_example(self, examples_batch): column_types = layers.create_feature_spec_for_parsing( (self._get_linear_feature_columns() or []) + (self._get_dnn_feature_columns() or [])) features = parsing_ops.parse_example(examples_batch, column_types) return features
def _apply_transform(self, input_tensors, **kwargs): parsed_values = parsing_ops.parse_example( input_tensors[0], features=self._ordered_features) # pylint: disable=not-callable return self.return_type(**parsed_values)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.io.parse_example'. Example: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations sparse_feature_a = sparse_column_with_keys( column_name="sparse_feature_a", keys=["AB", "CD", ...]) embedding_feature_a = embedding_column( sparse_id_column=sparse_feature_a, dimension=3, combiner="sum") sparse_feature_b = sparse_column_with_hash_bucket( column_name="sparse_feature_b", hash_bucket_size=1000) embedding_feature_b = embedding_column( sparse_id_column=sparse_feature_b, dimension=16, combiner="sum") crossed_feature_a_x_b = crossed_column( columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000) real_feature = real_valued_column("real_feature") real_feature_buckets = bucketized_column( source_column=real_feature, boundaries=[...]) my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def make_batched_features_dataset(file_pattern, batch_size, features, reader=core_readers.TFRecordDataset, reader_args=None, num_epochs=None, shuffle=True, shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=1, reader_num_threads=1, parser_num_threads=2, sloppy_ordering=False, drop_final_batch=False): """Returns a `Dataset` of feature dictionaries from `Example` protos. Example: ``` serialized_examples = [ features { feature { key: "age" value { int64_list { value: [ 0 ] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } } }, features { feature { key: "age" value { int64_list { value: [] } } } feature { key: "gender" value { bytes_list { value: [ "f" ] } } } feature { key: "kws" value { bytes_list { value: [ "sports" ] } } } } ] ``` We can use arguments: ``` features: { "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), "gender": FixedLenFeature([], dtype=tf.string), "kws": VarLenFeature(dtype=tf.string), } ``` And the expected output is: ```python { "age": [[0], [-1]], "gender": [["f"], ["f"]], "kws": SparseTensor( indices=[[0, 0], [0, 1], [1, 0]], values=["code", "art", "sports"] dense_shape=[2, 2]), } ``` Args: file_pattern: List of files or patterns of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int representing the number of consecutive elements of this dataset to combine in a single batch. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. See `tf.parse_example`. reader: A function or class that can be called with a `filenames` tensor and (optional) `reader_args` and returns a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`. reader_args: Additional arguments to pass to the reader class. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. Defaults to `None`. shuffle: A boolean, indicates whether the input should be shuffled. Defaults to `True`. shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity ensures better shuffling but would increase memory usage and startup time. shuffle_seed: Randomization seed to use for shuffling. prefetch_buffer_size: Number of feature batches to prefetch in order to improve performance. Recommended value is the number of batches consumed per training step (default is 1). reader_num_threads: Number of threads used to read `Example` records. If >1, the results will be interleaved. parser_num_threads: Number of threads to use for parsing `Example` tensors into a dictionary of `Feature` tensors. sloppy_ordering: If `True`, reading performance will be improved at the cost of non-deterministic ordering. If `False`, the order of elements produced is deterministic prior to shuffling (elements are still randomized if `shuffle=True`. Note that if the seed is set, then order of elements after shuffling is deterministic). Defaults to `False`. drop_final_batch: If `True`, and the batch size does not evenly divide the input dataset size, the final smaller batch will be dropped. Defaults to `False`. Returns: A dataset of `dict` elements. Each `dict` maps feature keys to `Tensor` or `SparseTensor` objects. """ # Create dataset of all matching filenames filenames = _get_file_names(file_pattern, False) dataset = dataset_ops.Dataset.from_tensor_slices(filenames) if shuffle: dataset = dataset.shuffle(len(filenames), shuffle_seed) # Read `Example` records from files as tensor objects. if reader_args is None: reader_args = [] # Read files sequentially (if reader_num_threads=1) or in parallel dataset = dataset.apply( interleave_ops.parallel_interleave( lambda filename: reader(filename, *reader_args), cycle_length=reader_num_threads, sloppy=sloppy_ordering)) # Extract values if the `Example` tensors are stored as key-value tuples. if dataset.output_types == (dtypes.string, dtypes.string): dataset = dataset.map(lambda _, v: v) # Apply dataset repeat and shuffle transformations. repeat_dataset = (num_epochs != 1) if repeat_dataset and shuffle: # Used fused shuffle_and_repeat operation for better performance dataset = dataset.apply( shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs, shuffle_seed)) elif repeat_dataset: dataset = dataset.repeat(num_epochs) elif shuffle: dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed) if drop_final_batch: dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size)) else: dataset = dataset.batch(batch_size) # Parse `Example` tensors to a dictionary of `Feature` tensors. dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features), num_parallel_calls=parser_num_threads) # TODO(rachelim): Add an optional label_name argument for extracting the label # from the features dictionary, to comply with the type expected by the # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function. dataset = dataset.prefetch(prefetch_buffer_size) return dataset