Exemple #1
0
def shared_embedding_columns(categorical_columns,
                             dimension,
                             combiner='mean',
                             initializer=None,
                             shared_embedding_collection_name=None):
    """List of dense columns that convert from sparse, categorical input."""
    for categorical_column in categorical_columns:
        if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
            raise TypeError(
                'categorical_column for tpu '
                ' shared_embedding_columns must be type %s, got %s.' %
                (' or '.join([
                    cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
                ]), type(categorical_column)))
    columns = fc_lib.shared_embedding_columns(
        categorical_columns,
        dimension,
        combiner=combiner,
        initializer=initializer,
        shared_embedding_collection_name=shared_embedding_collection_name,
        ckpt_to_load_from=None,
        tensor_name_in_ckpt=None,
        max_norm=None,
        trainable=True)

    # Use the initializer and shared_embedding_collection_name to create TPU
    # version
    initializer = columns[0].initializer
    shared_embedding_collection_name = columns[
        0].shared_embedding_collection_name
    tpu_columns = []

    # Create the state (_SharedEmbeddingColumnLayer) here.
    for categorical_column in categorical_columns:
        column = _TPUSharedEmbeddingColumn(
            categorical_column=categorical_column,
            dimension=dimension,
            combiner=combiner,
            initializer=initializer,
            shared_embedding_collection_name=shared_embedding_collection_name,
            ckpt_to_load_from=None,
            tensor_name_in_ckpt=None,
            max_norm=None,
            trainable=True)
        tpu_columns.append(column)

    return tpu_columns
Exemple #2
0
def shared_embedding_columns(categorical_columns,
                             dimension,
                             combiner='mean',
                             initializer=None,
                             shared_embedding_collection_name=None):
  """List of dense columns that convert from sparse, categorical input."""
  for categorical_column in categorical_columns:
    if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
      raise TypeError(
          'categorical_column for tpu '
          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
          ]), type(categorical_column)))
  columns = fc_lib.shared_embedding_columns(
      categorical_columns,
      dimension,
      combiner=combiner,
      initializer=initializer,
      shared_embedding_collection_name=shared_embedding_collection_name,
      ckpt_to_load_from=None,
      tensor_name_in_ckpt=None,
      max_norm=None,
      trainable=True)

  # Use the initializer and shared_embedding_collection_name to create TPU
  # version
  initializer = columns[0].initializer
  shared_embedding_collection_name = columns[0].shared_embedding_collection_name
  tpu_columns = []

  # Create the state (_SharedEmbeddingColumnLayer) here.
  for categorical_column in categorical_columns:
    column = _TPUSharedEmbeddingColumn(
        categorical_column=categorical_column,
        dimension=dimension,
        combiner=combiner,
        initializer=initializer,
        shared_embedding_collection_name=shared_embedding_collection_name,
        ckpt_to_load_from=None,
        tensor_name_in_ckpt=None,
        max_norm=None,
        trainable=True)
    tpu_columns.append(column)

  return tpu_columns
Exemple #3
0
  def test_encode_features(self):
    with tf.Graph().as_default():
      # Inputs.
      vocabulary_size = 4
      # -1 values are ignored.
      input_a = np.array([
          [3, -1, -1],  # example 0, ids [3]
          [0, 1, -1],  # example 1, ids [0, 1]
      ])
      input_b = np.array([
          [0, -1, -1],  # example 0, ids [0]
          [-1, -1, -1],  # example 1, ids []
      ])
      input_features = {"aaa": input_a, "bbb": input_b}

      # Embedding variable.
      embedding_dimension = 2
      embedding_values = (
          (1., 2.),  # id 0
          (3., 5.),  # id 1
          (7., 11.),  # id 2
          (9., 13.)  # id 3
      )

      # Expected lookup result, using combiner='mean'.
      expected_lookups_a = (
          # example 0:
          (9., 13.),  # ids [3], embedding = [9, 13]
          # example 1:
          (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
      )
      expected_lookups_b = (
          # example 0:
          (1., 2.),  # ids [0], embedding = [1, 2]
          # example 1:
          (0., 0.),  # ids [], embedding = [0, 0]
      )

      # Build columns.
      categorical_column_a = feature_column.categorical_column_with_identity(
          key="aaa", num_buckets=vocabulary_size)
      categorical_column_b = feature_column.categorical_column_with_identity(
          key="bbb", num_buckets=vocabulary_size)
      embed_column_a, embed_column_b = feature_column.shared_embedding_columns(
          [categorical_column_a, categorical_column_b],
          dimension=embedding_dimension,
          initializer=lambda shape, dtype, partition_info: embedding_values,
          shared_embedding_collection_name="custom_collection_name")

      feature_columns = {"aaa": embed_column_a, "bbb": embed_column_b}

      cols_to_tensors = feature_lib.encode_features(
          input_features,
          feature_columns.values(),
          mode=tf.estimator.ModeKeys.EVAL)

      embedding_lookup_a = cols_to_tensors[feature_columns["aaa"]]
      embedding_lookup_b = cols_to_tensors[feature_columns["bbb"]]

      # Assert expected embedding variable and lookups.
      global_vars = tf.compat.v1.get_collection(
          tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
      embedding_var = global_vars[0]
      with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        sess.run(tf.compat.v1.tables_initializer())
        self.assertAllEqual(embedding_values, embedding_var.eval())
        self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
        self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())