예제 #1
0
def text_embedding_column(key, module_spec, trainable=False):
    """Uses a Module to construct a dense representation from a text feature.

  This feature column can be used on an input feature whose values are strings
  of arbitrary size.

  The result of this feature column is the result of passing its `input`
  through the module `m` instantiated from `module_spec`, as per
  `result = m(input)`. The `result` must have dtype float32 and shape
  `[batch_size, num_features]` with a known value of num_features.

  Example:

  ```python
    comment = text_embedding_column("comment", "/tmp/text-module")
    feature_columns = [comment, ...]
    ...
    features = {
      "comment": np.array(["wow, much amazing", "so easy", ...]),
      ...
    }
    labels = np.array([[1], [0], ...])
    # If running TF 2.x, use `tf.compat.v1.estimator.inputs.numpy_input_fn`
    input_fn = tf.estimator.inputs.numpy_input_fn(features, labels,
                                                  shuffle=True)
    estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns)
    estimator.train(input_fn, max_steps=100)
  ```

  Args:
    key: A string or `_FeatureColumn` identifying the text feature.
    module_spec: A ModuleSpec defining the Module to instantiate or a path where
      to load a ModuleSpec via `load_module_spec`
    trainable: Whether or not the Module is trainable. False by default, meaning
      the pre-trained weights are frozen. This is different from the ordinary
      tf.feature_column.embedding_column(), but that one is intended for
      training from scratch.

  Returns:
    `_DenseColumn` that converts from text input.

  Raises:
     ValueError: if module_spec is not suitable for use in this feature column.
  """
    module_spec = module.as_module_spec(module_spec)
    _check_module_is_text_embedding(module_spec)
    return _TextEmbeddingColumn(key=key,
                                module_spec=module_spec,
                                trainable=trainable)
예제 #2
0
def text_embedding_column(key, module_spec, trainable=False):
  """Uses a Module to construct a dense representation from a text feature.

  This feature column can be used on an input feature whose values are strings
  of arbitrary size.

  The result of this feature column is the result of passing its `input`
  through the module `m` instantiated from `module_spec`, as per
  `result = m(input)`. The `result` must have dtype float32 and shape
  `[batch_size, num_features]` with a known value of num_features.

  Example:

  ```python
    comment = text_embedding_column("comment", "/tmp/text-module")
    feature_columns = [comment, ...]
    ...
    features = {
      "comment": np.array(["wow, much amazing", "so easy", ...]),
      ...
    }
    labels = np.array([[1], [0], ...])
    input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True)
    estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns)
    estimator.train(input_fn, max_steps=100)
  ```

  Args:
    key: A string or `_FeatureColumn` identifying the text feature.
    module_spec: A ModuleSpec defining the Module to instantiate or a path where
      to load a ModuleSpec via `load_module_spec`
    trainable: Whether or not the Module is trainable. False by default,
      meaning the pre-trained weights are frozen. This is different from the
      ordinary tf.feature_column.embedding_column(), but that one is intended
      for training from scratch.

  Returns:
    `_DenseColumn` that converts from text input.

  Raises:
     ValueError: if module_spec is not suitable for use in this feature column.
  """
  module_spec = module.as_module_spec(module_spec)
  _check_module_is_text_embedding(module_spec)
  return _TextEmbeddingColumn(key=key, module_spec=module_spec,
                              trainable=trainable)
예제 #3
0
def sequence_text_embedding_column(key, module_spec, trainable=False):
    """Upgraded version of tensorflow_hub.text_embedding_column with sequential input support

    Returns:
      `_SequenceDenseColumn` that converts from text input.

    Raises:
       ValueError: if module_spec is not suitable for use in this feature column.
    """

    module_spec = module.as_module_spec(module_spec)
    _check_module_is_sequence_text_embedding(module_spec)

    return _TextEmbeddingColumn(
        key=key,
        module_spec=module_spec,
        trainable=trainable,
        signature='sequence',
    )
예제 #4
0
def image_embedding_column(key, module_spec):
  """Uses a Module to get a dense 1-D representation from the pixels of images.

  This feature column can be used on images, represented as float32 tensors of
  RGB pixel data in the range [0,1]. This can be read from a numeric_column()
  if the tf.Example input data happens to have decoded images, all with the
  same shape [height, width, 3]. More commonly, the input_fn will have code to
  explicitly decode images, resize them (possibly after performing data
  augmentation such as random crops etc.), and provide a batch of shape
  [batch_size, height, width, 3].

  The result of this feature column is the result of passing its `input`
  through the module `m` instantiated from `module_spec`, as per
  `result = m({"images": input})`. The `result` must have dtype float32 and
  shape `[batch_size, num_features]` with a known value of num_features.

  Example:

  ```python
    image_column = hub.image_embedding_column("embeddings", "/tmp/image-module")
    feature_columns = [image_column, ...]
    estimator = tf.estimator.LinearClassifier(feature_columns, ...)
    height, width = hub.get_expected_image_size(image_column.module_spec)
    input_fn = ...  # Provides "embeddings" with shape [None, height, width, 3].
    estimator.train(input_fn, ...)
  ```

  Args:
    key: A string or `_FeatureColumn` identifying the input image data.
    module_spec: A string handle or a `ModuleSpec` identifying the module.

  Returns:
    `_DenseColumn` that converts from pixel data.

  Raises:
     ValueError: if module_spec is not suitable for use in this feature column.
  """
  module_spec = module.as_module_spec(module_spec)
  _check_module_is_image_embedding(module_spec)
  return _ImageEmbeddingColumn(key=key, module_spec=module_spec)
예제 #5
0
def image_embedding_column(key, module_spec):
  """Uses a Module to get a dense 1-D representation from the pixels of images.

  This feature column can be used on images, represented as float32 tensors of
  RGB pixel data in the range [0,1]. This can be read from a numeric_column()
  if the tf.Example input data happens to have decoded images, all with the
  same shape [height, width, 3]. More commonly, the input_fn will have code to
  explicitly decode images, resize them (possibly after performing data
  augmentation such as random crops etc.), and provide a batch of shape
  [batch_size, height, width, 3].

  The result of this feature column is the result of passing its `input`
  through the module `m` instantiated from `module_spec`, as per
  `result = m({"images": input})`. The `result` must have dtype float32 and
  shape `[batch_size, num_features]` with a known value of num_features.

  Example:

  ```python
    image_column = hub.image_embedding_column("embeddings", "/tmp/image-module")
    feature_columns = [image_column, ...]
    estimator = tf.estimator.LinearClassifier(feature_columns, ...)
    height, width = hub.get_expected_image_size(image_column.module_spec)
    input_fn = ...  # Provides "embeddings" with shape [None, height, width, 3].
    estimator.train(input_fn, ...)
  ```

  Args:
    key: A string or `_FeatureColumn` identifying the input image data.
    module_spec: A string handle or a `ModuleSpec` identifying the module.

  Returns:
    `_DenseColumn` that converts from pixel data.

  Raises:
     ValueError: if module_spec is not suitable for use in this feature column.
  """
  module_spec = module.as_module_spec(module_spec)
  _check_module_is_image_embedding(module_spec)
  return _ImageEmbeddingColumn(key=key, module_spec=module_spec)
예제 #6
0
def sparse_text_embedding_column(key,
                                 module_spec,
                                 combiner,
                                 default_value,
                                 trainable=False):
    """Uses a Module to construct dense representations from sparse text features.

  TODO(b/131678043): This does not work yet with TF2.

  The input to this feature column is a batch of multiple strings with
  arbitrary size, assuming the input is a SparseTensor.

  This type of feature column is typically suited for modules that operate on
  pre-tokenized text to produce token level embeddings which are combined with
  the combiner into a text embedding. The combiner always treats the tokens as a
  bag of words rather than a sequence.

  The output (i.e., transformed input layer) is a DenseTensor, with shape
  [batch_size, num_embedding_dim].

  For Example:

  ```python
    comment = hub.sparse_text_embedding_column("comment", "/tmp/text_module")
    feature_columns = [comment, ...]
    ...
    features = {
      "comment": tf.SparseTensor(indices=[[0, 0], [1, 2]],
                                 values=['sparse', 'embedding'],
                                 dense_shape=[3, 4]),
      ...
    }
    estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns)
  ```

  Args:
    key: A string or `_FeatureColumn` identifying the text feature.
    module_spec: A string handle or a `_ModuleSpec` identifying the module.
    combiner: a string specifying reducing op for embeddings in the same
      Example. Currently, 'mean', 'sqrtn', 'sum' are supported. Using
      combiner=None is undefined.
    default_value: default value for Examples where the text feature is empty.
      Note, it's recommended to have default_value consistent OOV tokens, in
      case there was special handling of OOV in the text module. If None, the
      text feature is assumed be non-empty for each Example.
    trainable: Whether or not the Module is trainable. False by default, meaning
      the pre-trained weights are frozen. This is different from the ordinary
      tf.feature_column.embedding_column(), but that one is intended for
      training from scratch.

  Returns:
    `_DenseColumn` that converts from text input.

  Raises:
     ValueError: if module_spec is not suitable for use in this feature column.
     ValueError: if combiner not in ('mean', 'sqrtn', 'sum').
  """
    module_spec = module.as_module_spec(module_spec)
    _check_module_is_text_embedding(module_spec)
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be 'mean', 'sqrtn' or 'sum': %r" %
                         combiner)
    return _SparseTextEmbeddingColumn(key=key,
                                      module_spec=module_spec,
                                      trainable=trainable,
                                      default_value=default_value,
                                      combiner=combiner)
예제 #7
0
 def __init__(self, key, module_spec_path):
     self.module_spec = module.as_module_spec(self.module_spec_path)
     _check_module_is_image_embedding(self.module_spec)
     super(_ImageEmbeddingColumn, self).__init__()
예제 #8
0
 def __init__(self, key, module_spec_path, trainable):
     self.module_spec = module.as_module_spec(self.module_spec_path)
     _check_module_is_text_embedding(self.module_spec)
     super(_TextEmbeddingColumn, self).__init__()
예제 #9
0
 def __init__(self, key, module_spec_path, image_size):
     self.module_spec = module.as_module_spec(self.module_spec_path)
     _check_module_is_image_embedding(
         self.module_spec, check_image_size=self.image_size is None)
     super(_ImageEmbeddingColumn, self).__init__()