def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, as_text=False, exports_to_keep=5, target_core=False, strip_default_attrs=False): # pylint: disable=line-too-long """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s. Creates a SavedModel export that expects to be fed with a single string Tensor containing serialized tf.Examples. At serving time, incoming tf.Examples will be parsed according to the provided `FeatureColumn`s. Args: feature_columns: An iterable of `FeatureColumn`s representing the features that must be provided at serving time (excluding labels!). default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Must be `None` if the estimator inherits from ${tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. target_core: If True, prepare an ExportStrategy for use with tensorflow.python.estimator.*. If False (default), prepare an ExportStrategy for use with tensorflow.contrib.learn.python.learn.*. strip_default_attrs: Boolean. If `True`, default-valued attributes will be removed from the NodeDefs. For a detailed guide, see [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). Returns: An ExportStrategy that can be passed to the Experiment constructor. """ # pylint: enable=line-too-long feature_spec = feature_column.create_feature_spec_for_parsing( feature_columns) if target_core: serving_input_fn = ( core_export.build_parsing_serving_input_receiver_fn(feature_spec)) else: serving_input_fn = ( input_fn_utils.build_parsing_serving_input_fn(feature_spec)) return make_export_strategy( serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, as_text=as_text, exports_to_keep=exports_to_keep, strip_default_attrs=strip_default_attrs)
def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, as_text=False, exports_to_keep=5, target_core=False, strip_default_attrs=False): # pylint: disable=line-too-long """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s. Creates a SavedModel export that expects to be fed with a single string Tensor containing serialized tf.Examples. At serving time, incoming tf.Examples will be parsed according to the provided `FeatureColumn`s. Args: feature_columns: An iterable of `FeatureColumn`s representing the features that must be provided at serving time (excluding labels!). default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Must be `None` if the estimator inherits from ${tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. target_core: If True, prepare an ExportStrategy for use with tensorflow.python.estimator.*. If False (default), prepare an ExportStrategy for use with tensorflow.contrib.learn.python.learn.*. strip_default_attrs: Boolean. If `True`, default-valued attributes will be removed from the NodeDefs. For a detailed guide, see [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). Returns: An ExportStrategy that can be passed to the Experiment constructor. """ # pylint: enable=line-too-long feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns) if target_core: serving_input_fn = ( core_export.build_parsing_serving_input_receiver_fn(feature_spec)) else: serving_input_fn = ( input_fn_utils.build_parsing_serving_input_fn(feature_spec)) return make_export_strategy( serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, as_text=as_text, exports_to_keep=exports_to_keep, strip_default_attrs=strip_default_attrs)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. A typical usage is as follows: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations country = sparse_column_with_keys(column_name="native_country", keys=["US", "BRA", ...]) country_emb = embedding_column(sparse_id_column=country, dimension=3, combiner="sum") occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, combiner="sum") occupation_x_country = crossed_column(columns=[occupation, country], hash_bucket_size=10000) age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) my_features = [occupation_emb, age_buckets, country_emb] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def parse_feature_columns_from_sequence_examples( serialized, context_feature_columns, sequence_feature_columns, name=None, example_name=None): """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s. Args: serialized: A scalar (0-D Tensor) of type string, a single serialized `SequenceExample` proto. context_feature_columns: An iterable containing the feature columns for context features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. sequence_feature_columns: An iterable containing the feature columns for sequence features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. name: A name for this operation (optional). example_name: A scalar (0-D Tensor) of type string (optional), the names of the serialized proto. Returns: A tuple consisting of (context_features, sequence_features) * context_features: a dict mapping `FeatureColumns` from `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s. * sequence_features: a dict mapping `FeatureColumns` from `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s. """ # Sequence example parsing requires a single (scalar) example. try: serialized = array_ops.reshape(serialized, []) except ValueError as e: raise ValueError( 'serialized must contain as single sequence example. Batching must be ' 'done after parsing for sequence examples. Error: {}'.format(e)) if context_feature_columns is None: context_feature_columns = [] if sequence_feature_columns is None: sequence_feature_columns = [] check_feature_columns(context_feature_columns) context_feature_spec = fc.create_feature_spec_for_parsing( context_feature_columns) check_feature_columns(sequence_feature_columns) sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing( # pylint: disable=protected-access sequence_feature_columns, allow_missing_by_default=False) return parsing_ops.parse_single_sequence_example(serialized, context_feature_spec, sequence_feature_spec, example_name, name)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. A typical usage is as follows: ``` columns_to_tensor = tf.contrib.layers.parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations country = sparse_column_with_keys("country", ["US", "BRA", ...]) country_embedding = embedding_column(query_word, dimension=3, combiner="sum") query_word = sparse_column_with_hash_bucket( "query_word", hash_bucket_size=int(1e6)) query_embedding = embedding_column(query_word, dimension=16, combiner="sum") age_bucket = bucketized_column(real_valued_column("age"), boundaries=[18+i*5 for i in range(10)]) my_features = [query_embedding, age_bucket, country_embedding] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def testCreateFeatureSpec_ExperimentalColumns(self): real_valued_col0 = fc._real_valued_var_len_column( "real_valued_column0", is_sparse=True) real_valued_col1 = fc._real_valued_var_len_column( "real_valued_column1", dtype=dtypes.int64, default_value=0, is_sparse=False) feature_columns = set([real_valued_col0, real_valued_col1]) expected_config = { "real_valued_column0": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column1": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=0), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config)
def _build_estimator_for_resource_export_test(): def _input_fn(): iris = base.load_iris() return { 'feature': constant_op.constant(iris.data, dtype=dtypes.float32) }, constant_op.constant(iris.target, shape=[150], dtype=dtypes.int32) feature_columns = [ feature_column_lib.real_valued_column('feature', dimension=4) ] def resource_constant_model_fn(unused_features, unused_labels, mode): """A model_fn that loads a constant from a resource and serves it.""" assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) const = constant_op.constant(-1, dtype=dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, const, name='LookupTableModel') if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL): key = constant_op.constant(['key']) value = constant_op.constant([42], dtype=dtypes.int64) train_op_1 = table.insert(key, value) training_state = lookup.MutableHashTable( dtypes.string, dtypes.int64, const, name='LookupTableTrainingState') training_op_2 = training_state.insert(key, value) return const, const, control_flow_ops.group( train_op_1, training_op_2) if mode == model_fn.ModeKeys.INFER: key = constant_op.constant(['key']) prediction = table.lookup(key) return prediction, const, control_flow_ops.no_op() est = estimator.Estimator(model_fn=resource_constant_model_fn) est.fit(input_fn=_input_fn, steps=1) feature_spec = feature_column_lib.create_feature_spec_for_parsing( feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn( feature_spec) return est, serving_input_fn
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = fc.real_valued_column( "real_valued_column1", default_value=2) real_valued_col2 = fc.real_valued_column( "real_valued_column2", 5, default_value=4) real_valued_col3 = fc.real_valued_column( "real_valued_column3", default_value=[8]) real_valued_col4 = fc.real_valued_column( "real_valued_column4", 3, default_value=[1, 0, 6]) real_valued_col5 = fc._real_valued_var_len_column( "real_valued_column5", default_value=2, is_sparse=True) real_valued_col6 = fc._real_valued_var_len_column( "real_valued_column6", dtype=dtypes.int64, default_value=1, is_sparse=False) feature_columns = [ real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, real_valued_col5, real_valued_col6 ] config = fc.create_feature_spec_for_parsing(feature_columns) self.assertEqual(6, len(config)) self.assertDictEqual( { "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[2.]), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32, default_value=[4., 4., 4., 4., 4.]), "real_valued_column3": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[8.]), "real_valued_column4": parsing_ops.FixedLenFeature( [3], dtype=dtypes.float32, default_value=[1., 0., 6.]), "real_valued_column5": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column6": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=1) }, config)
def _build_estimator_for_resource_export_test(): def _input_fn(): iris = base.load_iris() return { 'feature': constant_op.constant(iris.data, dtype=dtypes.float32) }, constant_op.constant( iris.target, shape=[150], dtype=dtypes.int32) feature_columns = [ feature_column_lib.real_valued_column('feature', dimension=4) ] def resource_constant_model_fn(unused_features, unused_labels, mode): """A model_fn that loads a constant from a resource and serves it.""" assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.INFER) const = constant_op.constant(-1, dtype=dtypes.int64) table = lookup.MutableHashTable( dtypes.string, dtypes.int64, const, name='LookupTableModel') update_global_step = variables.get_global_step().assign_add(1) if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL): key = constant_op.constant(['key']) value = constant_op.constant([42], dtype=dtypes.int64) train_op_1 = table.insert(key, value) training_state = lookup.MutableHashTable( dtypes.string, dtypes.int64, const, name='LookupTableTrainingState') training_op_2 = training_state.insert(key, value) return (const, const, control_flow_ops.group(train_op_1, training_op_2, update_global_step)) if mode == model_fn.ModeKeys.INFER: key = constant_op.constant(['key']) prediction = table.lookup(key) return prediction, const, update_global_step est = estimator.Estimator(model_fn=resource_constant_model_fn) est.fit(input_fn=_input_fn, steps=1) feature_spec = feature_column_lib.create_feature_spec_for_parsing( feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) return est, serving_input_fn
def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, as_text=False, exports_to_keep=5): """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s. Creates a SavedModel export that expects to be fed with a single string Tensor containing serialized tf.Examples. At serving time, incoming tf.Examples will be parsed according to the provided `FeatureColumn`s. Args: feature_columns: An iterable of `FeatureColumn`s representing the features that must be provided at serving time (excluding labels!). default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Must be `None` if the estimator inherits from ${tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ feature_spec = feature_column.create_feature_spec_for_parsing( feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn( feature_spec) return make_export_strategy( serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, as_text=as_text, exports_to_keep=exports_to_keep)
def make_parsing_export_strategy(feature_columns, exports_to_keep=5): """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s. Creates a SavedModel export that expects to be fed with a single string Tensor containing serialized tf.Examples. At serving time, incoming tf.Examples will be parsed according to the provided `FeatureColumn`s. Args: feature_columns: An iterable of `FeatureColumn`s representing the features that must be provided at serving time (excluding labels!). exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) return make_export_strategy(serving_input_fn, exports_to_keep=exports_to_keep)
def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, as_text=False, exports_to_keep=5): """Create an ExportStrategy for use with Experiment, using `FeatureColumn`s. Creates a SavedModel export that expects to be fed with a single string Tensor containing serialized tf.Examples. At serving time, incoming tf.Examples will be parsed according to the provided `FeatureColumn`s. Args: feature_columns: An iterable of `FeatureColumn`s representing the features that must be provided at serving time (excluding labels!). default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Must be `None` if the estimator inherits from ${tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ feature_spec = feature_column.create_feature_spec_for_parsing(feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) return make_export_strategy( serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, as_text=as_text, exports_to_keep=exports_to_keep)
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = fc.real_valued_column("real_valued_column1", default_value=2) real_valued_col2 = fc.real_valued_column("real_valued_column2", 5, default_value=4) real_valued_col3 = fc.real_valued_column("real_valued_column3", default_value=[8]) real_valued_col4 = fc.real_valued_column("real_valued_column4", 3, default_value=[1, 0, 6]) real_valued_col5 = fc.real_valued_column("real_valued_column5", dimension=None, default_value=2) feature_columns = [ real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, real_valued_col5 ] config = fc.create_feature_spec_for_parsing(feature_columns) self.assertEqual(5, len(config)) self.assertDictEqual( { "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[2.]), "real_valued_column2": parsing_ops.FixedLenFeature([5], dtype=dtypes.float32, default_value=[4., 4., 4., 4., 4. ]), "real_valued_column3": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[8.]), "real_valued_column4": parsing_ops.FixedLenFeature( [3], dtype=dtypes.float32, default_value=[1., 0., 6.]), "real_valued_column5": parsing_ops.VarLenFeature(dtype=dtypes.float32) }, config)
def _build_estimator_for_export_tests(tmpdir): def _input_fn(): iris = base.load_iris() return { 'feature': constant_op.constant( iris.data, dtype=dtypes.float32) }, constant_op.constant( iris.target, shape=[150], dtype=dtypes.int32) feature_columns = [ feature_column_lib.real_valued_column( 'feature', dimension=4) ] est = linear.LinearRegressor(feature_columns) est.fit(input_fn=_input_fn, steps=20) feature_spec = feature_column_lib.create_feature_spec_for_parsing( feature_columns) serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) # hack in an op that uses an asset, in order to test asset export. # this is not actually valid, of course. def serving_input_fn_with_asset(): features, labels, inputs = serving_input_fn() vocab_file_name = os.path.join(tmpdir, 'my_vocab_file') vocab_file = gfile.GFile(vocab_file_name, mode='w') vocab_file.write(VOCAB_FILE_CONTENT) vocab_file.close() hashtable = lookup.HashTable( lookup.TextFileStringTableInitializer(vocab_file_name), 'x') features['bogus_lookup'] = hashtable.lookup( math_ops.to_int64(features['feature'])) return input_fn_utils.InputFnOps(features, labels, inputs) return est, serving_input_fn_with_asset
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = fc.real_valued_column( "real_valued_column1", default_value=2) real_valued_col2 = fc.real_valued_column( "real_valued_column2", 5, default_value=4) real_valued_col3 = fc.real_valued_column( "real_valued_column3", default_value=[8]) real_valued_col4 = fc.real_valued_column( "real_valued_column4", 3, default_value=[1, 0, 6]) real_valued_col5 = fc.real_valued_column( "real_valued_column5", dimension=None, default_value=2) feature_columns = [ real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, real_valued_col5 ] config = fc.create_feature_spec_for_parsing(feature_columns) self.assertEqual(5, len(config)) self.assertDictEqual( { "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[2.]), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32, default_value=[4., 4., 4., 4., 4.]), "real_valued_column3": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[8.]), "real_valued_column4": parsing_ops.FixedLenFeature( [3], dtype=dtypes.float32, default_value=[1., 0., 6.]), "real_valued_column5": parsing_ops.VarLenFeature(dtype=dtypes.float32) }, config)
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket("sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = fc.sparse_column_with_keys( "id_column", ["marlo", "omar", "stringer"]) weighted_id_col = fc.weighted_sparse_column(sparse_id_col, "id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) real_valued_col3 = fc.real_valued_column("real_valued_column3", dimension=None) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2, real_valued_col3, bucketized_col1, bucketized_col2, cross_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "id_column": parsing_ops.VarLenFeature(dtypes.string), "id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature([1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature([5], dtype=dtypes.float32), "real_valued_column3": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature([1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature([4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string) } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.io.parse_example'. Example: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations sparse_feature_a = sparse_column_with_keys( column_name="sparse_feature_a", keys=["AB", "CD", ...]) embedding_feature_a = embedding_column( sparse_id_column=sparse_feature_a, dimension=3, combiner="sum") sparse_feature_b = sparse_column_with_hash_bucket( column_name="sparse_feature_b", hash_bucket_size=1000) embedding_feature_b = embedding_column( sparse_id_column=sparse_feature_b, dimension=16, combiner="sum") crossed_feature_a_x_b = crossed_column( columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000) real_feature = real_valued_column("real_feature") real_feature_buckets = bucketized_column( source_column=real_feature, boundaries=[...]) my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def parse_feature_columns_from_examples(serialized, feature_columns, name=None, example_names=None): """Parses tf.Examples to extract tensors for given feature_columns. This is a wrapper of 'tf.parse_example'. Example: ```python columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, feature_columns=my_features) # Where my_features are: # Define features and transformations sparse_feature_a = sparse_column_with_keys( column_name="sparse_feature_a", keys=["AB", "CD", ...]) embedding_feature_a = embedding_column( sparse_id_column=sparse_feature_a, dimension=3, combiner="sum") sparse_feature_b = sparse_column_with_hash_bucket( column_name="sparse_feature_b", hash_bucket_size=1000) embedding_feature_b = embedding_column( sparse_id_column=sparse_feature_b, dimension=16, combiner="sum") crossed_feature_a_x_b = crossed_column( columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000) real_feature = real_valued_column("real_feature") real_feature_buckets = bucketized_column( source_column=real_feature, boundaries=[...]) my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a] ``` Args: serialized: A vector (1-D Tensor) of strings, a batch of binary serialized `Example` protos. feature_columns: An iterable containing all the feature columns. All items should be instances of classes derived from _FeatureColumn. name: A name for this operation (optional). example_names: A vector (1-D Tensor) of strings (optional), the names of the serialized protos in the batch. Returns: A `dict` mapping FeatureColumn to `Tensor` and `SparseTensor` values. """ check_feature_columns(feature_columns) columns_to_tensors = parsing_ops.parse_example( serialized=serialized, features=fc.create_feature_spec_for_parsing(feature_columns), name=name, example_names=example_names) transformer = _Transformer(columns_to_tensors) for column in sorted(set(feature_columns), key=lambda x: x.key): transformer.transform(column) return columns_to_tensors
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) sparse_id_col = fc.sparse_column_with_keys("id_column", ["marlo", "omar", "stringer"]) weighted_id_col = fc.weighted_sparse_column(sparse_id_col, "id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) real_valued_col3 = fc.real_valued_column( "real_valued_column3", dimension=None) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, real_valued_col1, real_valued_col2, real_valued_col3, bucketized_col1, bucketized_col2, cross_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "id_column": parsing_ops.VarLenFeature(dtypes.string), "id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column3": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string) } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) str_sparse_id_col = fc.sparse_column_with_keys( "str_id_column", ["marlo", "omar", "stringer"]) int32_sparse_id_col = fc.sparse_column_with_keys( "int32_id_column", [42, 1, -1000], dtype=dtypes.int32) int64_sparse_id_col = fc.sparse_column_with_keys( "int64_id_column", [42, 1, -1000], dtype=dtypes.int64) weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col, "str_id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_one_hot", hash_bucket_size=100)) scattered_embedding_col = fc.scattered_embedding_column( "scattered_embedding_column", size=100, dimension=10, hash_key=1) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col, int64_sparse_id_col, real_valued_col1, real_valued_col2, bucketized_col1, bucketized_col2, cross_col, one_hot_col, scattered_embedding_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "str_id_column": parsing_ops.VarLenFeature(dtypes.string), "int32_id_column": parsing_ops.VarLenFeature(dtypes.int32), "int64_id_column": parsing_ops.VarLenFeature(dtypes.int64), "str_id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_one_hot": parsing_ops.VarLenFeature(dtypes.string), "scattered_embedding_column": parsing_ops.VarLenFeature(dtypes.string), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Tests that contrib feature columns work with core library: config_core = fc_core.make_parse_example_spec(feature_columns) self.assertDictEqual(expected_config, config_core) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)
def testCreateFeatureSpec(self): sparse_col = fc.sparse_column_with_hash_bucket( "sparse_column", hash_bucket_size=100) embedding_col = fc.embedding_column( fc.sparse_column_with_hash_bucket( "sparse_column_for_embedding", hash_bucket_size=10), dimension=4) str_sparse_id_col = fc.sparse_column_with_keys( "str_id_column", ["marlo", "omar", "stringer"]) int32_sparse_id_col = fc.sparse_column_with_keys( "int32_id_column", [42, 1, -1000], dtype=dtypes.int32) int64_sparse_id_col = fc.sparse_column_with_keys( "int64_id_column", [42, 1, -1000], dtype=dtypes.int64) weighted_id_col = fc.weighted_sparse_column(str_sparse_id_col, "str_id_weights_column") real_valued_col1 = fc.real_valued_column("real_valued_column1") real_valued_col2 = fc.real_valued_column("real_valued_column2", 5) real_valued_col3 = fc._real_valued_var_len_column( "real_valued_column3", is_sparse=True) real_valued_col4 = fc._real_valued_var_len_column( "real_valued_column4", dtype=dtypes.int64, default_value=0, is_sparse=False) bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) bucketized_col2 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization2", 4), [0, 4]) a = fc.sparse_column_with_hash_bucket("cross_aaa", hash_bucket_size=100) b = fc.sparse_column_with_hash_bucket("cross_bbb", hash_bucket_size=100) cross_col = fc.crossed_column(set([a, b]), hash_bucket_size=10000) one_hot_col = fc.one_hot_column(fc.sparse_column_with_hash_bucket( "sparse_column_for_one_hot", hash_bucket_size=100)) scattered_embedding_col = fc.scattered_embedding_column( "scattered_embedding_column", size=100, dimension=10, hash_key=1) feature_columns = set([ sparse_col, embedding_col, weighted_id_col, int32_sparse_id_col, int64_sparse_id_col, real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, bucketized_col1, bucketized_col2, cross_col, one_hot_col, scattered_embedding_col ]) expected_config = { "sparse_column": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_embedding": parsing_ops.VarLenFeature(dtypes.string), "str_id_column": parsing_ops.VarLenFeature(dtypes.string), "int32_id_column": parsing_ops.VarLenFeature(dtypes.int32), "int64_id_column": parsing_ops.VarLenFeature(dtypes.int64), "str_id_weights_column": parsing_ops.VarLenFeature(dtypes.float32), "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column2": parsing_ops.FixedLenFeature( [5], dtype=dtypes.float32), "real_valued_column3": parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_column4": parsing_ops.FixedLenSequenceFeature( [], dtype=dtypes.int64, allow_missing=True, default_value=0), "real_valued_column_for_bucketization1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32), "real_valued_column_for_bucketization2": parsing_ops.FixedLenFeature( [4], dtype=dtypes.float32), "cross_aaa": parsing_ops.VarLenFeature(dtypes.string), "cross_bbb": parsing_ops.VarLenFeature(dtypes.string), "sparse_column_for_one_hot": parsing_ops.VarLenFeature(dtypes.string), "scattered_embedding_column": parsing_ops.VarLenFeature(dtypes.string), } config = fc.create_feature_spec_for_parsing(feature_columns) self.assertDictEqual(expected_config, config) # Test that the same config is parsed out if we pass a dictionary. feature_columns_dict = { str(i): val for i, val in enumerate(feature_columns) } config = fc.create_feature_spec_for_parsing(feature_columns_dict) self.assertDictEqual(expected_config, config)