def text_embedding_column(key, module_spec, trainable=False): """Uses a Module to construct a dense representation from a text feature. This feature column can be used on an input feature whose values are strings of arbitrary size. The result of this feature column is the result of passing its `input` through the module `m` instantiated from `module_spec`, as per `result = m(input)`. The `result` must have dtype float32 and shape `[batch_size, num_features]` with a known value of num_features. Example: ```python comment = text_embedding_column("comment", "/tmp/text-module") feature_columns = [comment, ...] ... features = { "comment": np.array(["wow, much amazing", "so easy", ...]), ... } labels = np.array([[1], [0], ...]) # If running TF 2.x, use `tf.compat.v1.estimator.inputs.numpy_input_fn` input_fn = tf.estimator.inputs.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns) estimator.train(input_fn, max_steps=100) ``` Args: key: A string or `_FeatureColumn` identifying the text feature. module_spec: A ModuleSpec defining the Module to instantiate or a path where to load a ModuleSpec via `load_module_spec` trainable: Whether or not the Module is trainable. False by default, meaning the pre-trained weights are frozen. This is different from the ordinary tf.feature_column.embedding_column(), but that one is intended for training from scratch. Returns: `_DenseColumn` that converts from text input. Raises: ValueError: if module_spec is not suitable for use in this feature column. """ module_spec = module.as_module_spec(module_spec) _check_module_is_text_embedding(module_spec) return _TextEmbeddingColumn(key=key, module_spec=module_spec, trainable=trainable)
def text_embedding_column(key, module_spec, trainable=False): """Uses a Module to construct a dense representation from a text feature. This feature column can be used on an input feature whose values are strings of arbitrary size. The result of this feature column is the result of passing its `input` through the module `m` instantiated from `module_spec`, as per `result = m(input)`. The `result` must have dtype float32 and shape `[batch_size, num_features]` with a known value of num_features. Example: ```python comment = text_embedding_column("comment", "/tmp/text-module") feature_columns = [comment, ...] ... features = { "comment": np.array(["wow, much amazing", "so easy", ...]), ... } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns) estimator.train(input_fn, max_steps=100) ``` Args: key: A string or `_FeatureColumn` identifying the text feature. module_spec: A ModuleSpec defining the Module to instantiate or a path where to load a ModuleSpec via `load_module_spec` trainable: Whether or not the Module is trainable. False by default, meaning the pre-trained weights are frozen. This is different from the ordinary tf.feature_column.embedding_column(), but that one is intended for training from scratch. Returns: `_DenseColumn` that converts from text input. Raises: ValueError: if module_spec is not suitable for use in this feature column. """ module_spec = module.as_module_spec(module_spec) _check_module_is_text_embedding(module_spec) return _TextEmbeddingColumn(key=key, module_spec=module_spec, trainable=trainable)
def sequence_text_embedding_column(key, module_spec, trainable=False): """Upgraded version of tensorflow_hub.text_embedding_column with sequential input support Returns: `_SequenceDenseColumn` that converts from text input. Raises: ValueError: if module_spec is not suitable for use in this feature column. """ module_spec = module.as_module_spec(module_spec) _check_module_is_sequence_text_embedding(module_spec) return _TextEmbeddingColumn( key=key, module_spec=module_spec, trainable=trainable, signature='sequence', )
def image_embedding_column(key, module_spec): """Uses a Module to get a dense 1-D representation from the pixels of images. This feature column can be used on images, represented as float32 tensors of RGB pixel data in the range [0,1]. This can be read from a numeric_column() if the tf.Example input data happens to have decoded images, all with the same shape [height, width, 3]. More commonly, the input_fn will have code to explicitly decode images, resize them (possibly after performing data augmentation such as random crops etc.), and provide a batch of shape [batch_size, height, width, 3]. The result of this feature column is the result of passing its `input` through the module `m` instantiated from `module_spec`, as per `result = m({"images": input})`. The `result` must have dtype float32 and shape `[batch_size, num_features]` with a known value of num_features. Example: ```python image_column = hub.image_embedding_column("embeddings", "/tmp/image-module") feature_columns = [image_column, ...] estimator = tf.estimator.LinearClassifier(feature_columns, ...) height, width = hub.get_expected_image_size(image_column.module_spec) input_fn = ... # Provides "embeddings" with shape [None, height, width, 3]. estimator.train(input_fn, ...) ``` Args: key: A string or `_FeatureColumn` identifying the input image data. module_spec: A string handle or a `ModuleSpec` identifying the module. Returns: `_DenseColumn` that converts from pixel data. Raises: ValueError: if module_spec is not suitable for use in this feature column. """ module_spec = module.as_module_spec(module_spec) _check_module_is_image_embedding(module_spec) return _ImageEmbeddingColumn(key=key, module_spec=module_spec)
def sparse_text_embedding_column(key, module_spec, combiner, default_value, trainable=False): """Uses a Module to construct dense representations from sparse text features. TODO(b/131678043): This does not work yet with TF2. The input to this feature column is a batch of multiple strings with arbitrary size, assuming the input is a SparseTensor. This type of feature column is typically suited for modules that operate on pre-tokenized text to produce token level embeddings which are combined with the combiner into a text embedding. The combiner always treats the tokens as a bag of words rather than a sequence. The output (i.e., transformed input layer) is a DenseTensor, with shape [batch_size, num_embedding_dim]. For Example: ```python comment = hub.sparse_text_embedding_column("comment", "/tmp/text_module") feature_columns = [comment, ...] ... features = { "comment": tf.SparseTensor(indices=[[0, 0], [1, 2]], values=['sparse', 'embedding'], dense_shape=[3, 4]), ... } estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns) ``` Args: key: A string or `_FeatureColumn` identifying the text feature. module_spec: A string handle or a `_ModuleSpec` identifying the module. combiner: a string specifying reducing op for embeddings in the same Example. Currently, 'mean', 'sqrtn', 'sum' are supported. Using combiner=None is undefined. default_value: default value for Examples where the text feature is empty. Note, it's recommended to have default_value consistent OOV tokens, in case there was special handling of OOV in the text module. If None, the text feature is assumed be non-empty for each Example. trainable: Whether or not the Module is trainable. False by default, meaning the pre-trained weights are frozen. This is different from the ordinary tf.feature_column.embedding_column(), but that one is intended for training from scratch. Returns: `_DenseColumn` that converts from text input. Raises: ValueError: if module_spec is not suitable for use in this feature column. ValueError: if combiner not in ('mean', 'sqrtn', 'sum'). """ module_spec = module.as_module_spec(module_spec) _check_module_is_text_embedding(module_spec) if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be 'mean', 'sqrtn' or 'sum': %r" % combiner) return _SparseTextEmbeddingColumn(key=key, module_spec=module_spec, trainable=trainable, default_value=default_value, combiner=combiner)
def __init__(self, key, module_spec_path): self.module_spec = module.as_module_spec(self.module_spec_path) _check_module_is_image_embedding(self.module_spec) super(_ImageEmbeddingColumn, self).__init__()
def __init__(self, key, module_spec_path, trainable): self.module_spec = module.as_module_spec(self.module_spec_path) _check_module_is_text_embedding(self.module_spec) super(_TextEmbeddingColumn, self).__init__()
def __init__(self, key, module_spec_path, image_size): self.module_spec = module.as_module_spec(self.module_spec_path) _check_module_is_image_embedding( self.module_spec, check_image_size=self.image_size is None) super(_ImageEmbeddingColumn, self).__init__()