def testBowEncoderSparseTensor(self):
   with self.cached_session() as sess:
     docs = [[0, 1], [2, 3]]
     sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
     enc = encoders.bow_encoder(sparse_docs, 4, 3)
     sess.run(variables.global_variables_initializer())
     self.assertAllEqual([2, 3], enc.eval().shape)
def bow_encoder(ids,
                vocab_size,
                embed_dim,
                sparse_lookup=True,
                initializer=None,
                regularizer=None,
                trainable=True,
                scope=None,
                reuse=None):
    """Maps a sequence of symbols to a vector per example by averaging embeddings.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type
      `int32` or `int64` with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor`
        and performs a sparse embedding lookup. This is usually faster,
        but not desirable if padding tokens should have an embedding. Empty rows
        are assigned a special embedding.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    Encoding `Tensor` `[batch_size, embed_dim]` produced by
    averaging embeddings.

  Raises:
    ValueError: If `embed_dim` or `vocab_size` are not specified.
  """
    if not vocab_size or not embed_dim:
        raise ValueError('Must specify vocab size and embedding dimension')
    with variable_scope.variable_scope(scope,
                                       'bow_encoder', [ids],
                                       reuse=reuse):
        embeddings = variables.model_variable('embeddings',
                                              shape=[vocab_size, embed_dim],
                                              initializer=initializer,
                                              regularizer=regularizer,
                                              trainable=trainable)
        if sparse_lookup:
            if isinstance(ids, sparse_tensor.SparseTensor):
                sparse_ids = ids
            else:
                sparse_ids = sparse_ops.dense_to_sparse_tensor(ids)
            return contrib_embedding_ops.safe_embedding_lookup_sparse(
                [embeddings], sparse_ids, combiner='mean', default_id=0)
        else:
            if isinstance(ids, sparse_tensor.SparseTensor):
                raise TypeError('ids are expected to be dense Tensor, got: %s',
                                ids)
            return math_ops.reduce_mean(embedding_ops.embedding_lookup(
                embeddings, ids),
                                        axis=1)
Esempio n. 3
0
 def test_dense_to_sparse_tensor_2d(self):
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([[1, 2, 0, 0], [3, 4, 5, 0]])
     result = sess.run(st)
   self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                       result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5], result.values)
   self.assertAllEqual([2, 4], result.dense_shape)
Esempio n. 4
0
 def test_dense_to_sparse_tensor_unknown_1d_shape(self):
   with self.cached_session() as sess:
     tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32)
     st = sparse_ops.dense_to_sparse_tensor(tensor)
     result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]})
   self.assertAllEqual([[1], [3]], result.indices)
   self.assertAllEqual([100, 3], result.values)
   self.assertAllEqual([4], result.dense_shape)
Esempio n. 5
0
 def test_dense_to_sparse_unknown_rank(self):
   ph = array_ops.placeholder(dtype=dtypes.int32)
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor(ph)
     result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]})
   self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                       result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5], result.values)
   self.assertAllEqual([2, 4], result.dense_shape)
Esempio n. 6
0
 def test_dense_to_sparse_tensor_1d_str(self):
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.object)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([b'qwe', b'ewq'], result.values)
   self.assertAllEqual([4], result.dense_shape)
Esempio n. 7
0
 def test_dense_to_sparse_tensor_1d_bool(self):
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([True, False, True, False])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.bool)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([True, True], result.values)
   self.assertAllEqual([4], result.dense_shape)
Esempio n. 8
0
 def test_dense_to_sparse_tensor_1d_float(self):
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.float32)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllClose([1.5, 2.3], result.values)
   self.assertAllEqual([4], result.dense_shape)
Esempio n. 9
0
 def test_dense_to_sparse_tensor_1d(self):
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.int32)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([1, 2], result.values)
   self.assertAllEqual([4], result.dense_shape)
Esempio n. 10
0
 def test_dense_to_sparse_tensor_unknown_3d_shape(self):
   with self.cached_session() as sess:
     tensor = array_ops.placeholder(
         shape=[None, None, None], dtype=dtypes.int32)
     st = sparse_ops.dense_to_sparse_tensor(tensor)
     result = sess.run(st,
                       feed_dict={
                           tensor: [[[1, 2, 0, 0], [3, 4, 5, 0]],
                                    [[7, 8, 0, 0], [9, 0, 0, 0]]]
                       })
   self.assertAllEqual([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2],
                        [1, 0, 0], [1, 0, 1], [1, 1, 0]], result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5, 7, 8, 9], result.values)
   self.assertAllEqual([2, 2, 4], result.dense_shape)
 def testBowEncoderSparseTensorDenseLookup(self):
   with self.cached_session():
     docs = [[0, 1]]
     sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
     with self.assertRaises(TypeError):
       encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)