Example #1
0
def test():
    fea = tf.constant(feature)
    la = tf.constant(label)
    po = tf.constant(pos)
    features = {
        params.feature_name: dense_to_sparse_tensor(fea),
        params.pos_name: dense_to_sparse_tensor(po),
        params.distance_name: dense_to_sparse_tensor(dis)
    }
    if tf.executing_eagerly():
        print("feature is \r\n {}".format(features[params.feature_name]))
        print("pos is \r\n {}".format(features[params.pos_name]))
    model_fn(features, la, tf.estimator.ModeKeys.TRAIN, config, params)
Example #2
0
 def testBowEncoderSparseTensor(self):
   with self.cached_session() as sess:
     docs = [[0, 1], [2, 3]]
     sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
     enc = encoders.bow_encoder(sparse_docs, 4, 3)
     sess.run(variables.global_variables_initializer())
     self.assertAllEqual([2, 3], enc.eval().shape)
Example #3
0
 def testBowEncoderSparseTensor(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
         enc = encoders.bow_encoder(sparse_docs, 4, 3)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([2, 3], enc.eval().shape)
Example #4
0
def bow_encoder(ids,
                vocab_size,
                embed_dim,
                sparse_lookup=True,
                initializer=None,
                regularizer=None,
                trainable=True,
                scope=None,
                reuse=None):
    """Maps a sequence of symbols to a vector per example by averaging embeddings.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type
      `int32` or `int64` with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor`
        and performs a sparse embedding lookup. This is usually faster,
        but not desirable if padding tokens should have an embedding. Empty rows
        are assigned a special embedding.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    Encoding `Tensor` `[batch_size, embed_dim]` produced by
    averaging embeddings.

  Raises:
    ValueError: If `embed_dim` or `vocab_size` are not specified.
  """
    if not vocab_size or not embed_dim:
        raise ValueError('Must specify vocab size and embedding dimension')
    with variable_scope.variable_scope(scope,
                                       'bow_encoder', [ids],
                                       reuse=reuse):
        embeddings = variables.model_variable('embeddings',
                                              shape=[vocab_size, embed_dim],
                                              initializer=initializer,
                                              regularizer=regularizer,
                                              trainable=trainable)
        if sparse_lookup:
            if isinstance(ids, ops.SparseTensor):
                sparse_ids = ids
            else:
                sparse_ids = sparse_ops.dense_to_sparse_tensor(ids)
            return contrib_embedding_ops.safe_embedding_lookup_sparse(
                [embeddings], sparse_ids, combiner='mean', default_id=0)
        else:
            if isinstance(ids, ops.SparseTensor):
                raise TypeError('ids are expected to be dense Tensor, got: %s',
                                ids)
            return math_ops.reduce_mean(embedding_ops.embedding_lookup(
                embeddings, ids),
                                        reduction_indices=1)
Example #5
0
 def test_dense_to_sparse_tensor_1d_no_shape(self):
   with self.test_session() as sess:
     tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32)
     st = sparse_ops.dense_to_sparse_tensor(tensor)
     result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]})
   self.assertAllEqual([[1], [3]], result.indices)
   self.assertAllEqual([100, 3], result.values)
   self.assertAllEqual([4], result.dense_shape)
Example #6
0
 def test_dense_to_sparse_tensor_2d(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([[1, 2, 0, 0], [3, 4, 5, 0]])
     result = sess.run(st)
   self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                       result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5], result.values)
   self.assertAllEqual([2, 4], result.dense_shape)
 def test_dense_to_sparse_tensor_1d_no_shape(self):
     with self.test_session() as sess:
         tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32)
         st = sparse_ops.dense_to_sparse_tensor(tensor)
         result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]})
     self.assertAllEqual([[1], [3]], result.indices)
     self.assertAllEqual([100, 3], result.values)
     self.assertAllEqual([4], result.dense_shape)
Example #8
0
def bow_encoder(ids,
                vocab_size,
                embed_dim,
                sparse_lookup=True,
                initializer=None,
                regularizer=None,
                trainable=True,
                scope=None,
                reuse=None):
  """Maps a sequence of symbols to a vector per example by averaging embeddings.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type
      `int32` or `int64` with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor`
        and performs a sparse embedding lookup. This is usually faster,
        but not desirable if padding tokens should have an embedding. Empty rows
        are assigned a special embedding.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    Encoding `Tensor` `[batch_size, embed_dim]` produced by
    averaging embeddings.

  Raises:
    ValueError: If `embed_dim` or `vocab_size` are not specified.
  """
  if not vocab_size or not embed_dim:
    raise ValueError('Must specify vocab size and embedding dimension')
  with variable_scope.variable_scope(
      scope, 'bow_encoder', [ids], reuse=reuse):
    embeddings = variables.model_variable(
        'embeddings', shape=[vocab_size, embed_dim],
        initializer=initializer, regularizer=regularizer,
        trainable=trainable)
    if sparse_lookup:
      if isinstance(ids, sparse_tensor.SparseTensor):
        sparse_ids = ids
      else:
        sparse_ids = sparse_ops.dense_to_sparse_tensor(ids)
      return contrib_embedding_ops.safe_embedding_lookup_sparse(
          [embeddings], sparse_ids, combiner='mean', default_id=0)
    else:
      if isinstance(ids, sparse_tensor.SparseTensor):
        raise TypeError('ids are expected to be dense Tensor, got: %s', ids)
      return math_ops.reduce_mean(
          embedding_ops.embedding_lookup(embeddings, ids),
          reduction_indices=1)
Example #9
0
 def test_dense_to_sparse_unknown_rank(self):
     ph = array_ops.placeholder(dtype=dtypes.int32)
     with self.cached_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor(ph)
         result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]})
     self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                         result.indices)
     self.assertAllEqual([1, 2, 3, 4, 5], result.values)
     self.assertAllEqual([2, 4], result.dense_shape)
Example #10
0
 def test_dense_to_sparse_unknown_rank(self):
   ph = array_ops.placeholder(dtype=dtypes.int32)
   with self.cached_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor(ph)
     result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]})
   self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                       result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5], result.values)
   self.assertAllEqual([2, 4], result.dense_shape)
 def test_dense_to_sparse_tensor_1d_str(self):
     with self.test_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''])
         result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
     self.assertEqual(result.values.dtype, np.object)
     self.assertEqual(result.dense_shape.dtype, np.int64)
     self.assertAllEqual([[0], [2]], result.indices)
     self.assertAllEqual([b'qwe', b'ewq'], result.values)
     self.assertAllEqual([4], result.dense_shape)
Example #12
0
 def test_dense_to_sparse_tensor_1d_bool(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([True, False, True, False])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.bool)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([True, True], result.values)
   self.assertAllEqual([4], result.dense_shape)
 def test_dense_to_sparse_tensor_1d(self):
     with self.test_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0])
         result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
     self.assertEqual(result.values.dtype, np.int32)
     self.assertEqual(result.dense_shape.dtype, np.int64)
     self.assertAllEqual([[0], [2]], result.indices)
     self.assertAllEqual([1, 2], result.values)
     self.assertAllEqual([4], result.dense_shape)
Example #14
0
 def test_dense_to_sparse_tensor_1d_str(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.object)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([b'qwe', b'ewq'], result.values)
   self.assertAllEqual([4], result.dense_shape)
 def test_dense_to_sparse_tensor_1d_float(self):
     with self.test_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0])
         result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
     self.assertEqual(result.values.dtype, np.float32)
     self.assertEqual(result.dense_shape.dtype, np.int64)
     self.assertAllEqual([[0], [2]], result.indices)
     self.assertAllClose([1.5, 2.3], result.values)
     self.assertAllEqual([4], result.dense_shape)
 def test_dense_to_sparse_tensor_1d_bool(self):
     with self.test_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor([True, False, True, False])
         result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
     self.assertEqual(result.values.dtype, np.bool)
     self.assertEqual(result.dense_shape.dtype, np.int64)
     self.assertAllEqual([[0], [2]], result.indices)
     self.assertAllEqual([True, True], result.values)
     self.assertAllEqual([4], result.dense_shape)
Example #17
0
 def test_dense_to_sparse_tensor_1d(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.int32)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllEqual([1, 2], result.values)
   self.assertAllEqual([4], result.dense_shape)
Example #18
0
 def test_dense_to_sparse_tensor_1d_float(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0])
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.float32)
   self.assertEqual(result.dense_shape.dtype, np.int64)
   self.assertAllEqual([[0], [2]], result.indices)
   self.assertAllClose([1.5, 2.3], result.values)
   self.assertAllEqual([4], result.dense_shape)
Example #19
0
 def test_dense_to_sparse_tensor_1d_str_special_ignore(self):
   with self.test_session() as sess:
     st = sparse_ops.dense_to_sparse_tensor(
         [b'qwe', b'', b'ewq', b''], ignore_value=b'qwe')
     result = sess.run(st)
   self.assertEqual(result.indices.dtype, np.int64)
   self.assertEqual(result.values.dtype, np.object)
   self.assertEqual(result.shape.dtype, np.int64)
   self.assertAllEqual([[1], [2], [3]], result.indices)
   self.assertAllEqual([b'', b'ewq', b''], result.values)
   self.assertAllEqual([4], result.shape)
Example #20
0
 def test_dense_to_sparse_tensor_1d_str_special_ignore(self):
     with self.cached_session() as sess:
         st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''],
                                                ignore_value=b'qwe')
         result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
     self.assertEqual(result.values.dtype, np.object)
     self.assertEqual(result.dense_shape.dtype, np.int64)
     self.assertAllEqual([[1], [2], [3]], result.indices)
     self.assertAllEqual([b'', b'ewq', b''], result.values)
     self.assertAllEqual([4], result.dense_shape)
Example #21
0
 def test_dense_to_sparse_tensor_3d_no_shape(self):
   with self.test_session() as sess:
     tensor = tf.placeholder(shape=[None, None, None], dtype=tf.int32)
     st = sparse_ops.dense_to_sparse_tensor(tensor)
     result = sess.run(st,
                       feed_dict={
                           tensor: [[[1, 2, 0, 0], [3, 4, 5, 0]],
                                    [[7, 8, 0, 0], [9, 0, 0, 0]]]
                       })
   self.assertAllEqual([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2],
                        [1, 0, 0], [1, 0, 1], [1, 1, 0]], result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5, 7, 8, 9], result.values)
   self.assertAllEqual([2, 2, 4], result.shape)
Example #22
0
 def test_dense_to_sparse_tensor_3d_no_shape(self):
   with self.test_session() as sess:
     tensor = tf.placeholder(shape=[None, None, None], dtype=tf.int32)
     st = sparse_ops.dense_to_sparse_tensor(tensor)
     result = sess.run(st,
                       feed_dict={
                           tensor: [[[1, 2, 0, 0], [3, 4, 5, 0]],
                                    [[7, 8, 0, 0], [9, 0, 0, 0]]]
                       })
   self.assertAllEqual([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2],
                        [1, 0, 0], [1, 0, 1], [1, 1, 0]], result.indices)
   self.assertAllEqual([1, 2, 3, 4, 5, 7, 8, 9], result.values)
   self.assertAllEqual([2, 2, 4], result.shape)
  def _prepare_inputs_for_fit_sparse(self,
                                     model_matrix,
                                     response,
                                     model_coefficients_start=None,
                                     convert_to_sparse_tensor=False):
    if model_coefficients_start is None:
      model_coefficients_start = np.zeros(model_matrix.shape[:-2] +
                                          model_matrix.shape[-1:])
    if convert_to_sparse_tensor:
      model_matrix = sparse_ops.dense_to_sparse_tensor(model_matrix)

    model_matrix = self._adjust_dtype_and_shape_hints(model_matrix)
    response = self._adjust_dtype_and_shape_hints(response)
    model_coefficients_start = self._adjust_dtype_and_shape_hints(
        model_coefficients_start)

    return model_matrix, response, model_coefficients_start
  def _prepare_inputs_for_fit_sparse(self,
                                     model_matrix,
                                     response,
                                     model_coefficients_start=None,
                                     convert_to_sparse_tensor=False):
    if model_coefficients_start is None:
      model_coefficients_start = np.zeros(model_matrix.shape[:-2] +
                                          model_matrix.shape[-1:])
    if convert_to_sparse_tensor:
      model_matrix = sparse_ops.dense_to_sparse_tensor(model_matrix)

    model_matrix = self._adjust_dtype_and_shape_hints(model_matrix)
    response = self._adjust_dtype_and_shape_hints(response)
    model_coefficients_start = self._adjust_dtype_and_shape_hints(
        model_coefficients_start)

    return model_matrix, response, model_coefficients_start
Example #25
0
    def verify_sparse_dense_matmul(self, x_, y_):
        if self.use_sparse_tensor:
            x = self._make_sparse_placeholder(
                sparse_ops.dense_to_sparse_tensor(x_))
        else:
            x = self._make_placeholder(x_)

        y = self._make_placeholder(y_)

        z = tfp.math.sparse_or_dense_matmul(x, y)
        z_ = self.evaluate(z)

        if self.use_static_shape:
            batch_shape = x_.shape[:-2]
            self.assertAllEqual(z_.shape,
                                batch_shape + (x_.shape[-2], y_.shape[-1]))

        self.assertAllClose(z_, np.matmul(x_, y_), atol=0., rtol=1e-3)
Example #26
0
    def to_sparse_tensor(self, input_tensor):
        """Creates a SparseTensor from the bucketized Tensor."""
        dimension = self.source_column.dimension
        batch_size = array_ops.shape(input_tensor, name="shape")[0]

        if len(input_tensor.get_shape()) > 2:
            return sparse_ops.dense_to_sparse_tensor(input_tensor,
                                                     ignore_value=-2**31)

        if dimension > 1:
            i1 = array_ops.reshape(array_ops.tile(
                array_ops.expand_dims(math_ops.range(0, batch_size),
                                      1,
                                      name="expand_dims"), [1, dimension],
                name="tile"), [-1],
                                   name="reshape")
            i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size],
                                name="tile")
            # Flatten the bucket indices and unique them across dimensions
            # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets
            bucket_indices = array_ops.reshape(
                input_tensor, [-1], name="reshape") + self.length * i2
        else:
            # Simpler indices when dimension=1
            i1 = math_ops.range(0, batch_size)
            i2 = array_ops.zeros([batch_size],
                                 dtype=dtypes.int32,
                                 name="zeros")
            bucket_indices = array_ops.reshape(input_tensor, [-1],
                                               name="reshape")

        indices = math_ops.to_int64(
            array_ops.transpose(array_ops.stack((i1, i2))))
        shape = math_ops.to_int64(array_ops.stack([batch_size, dimension]))
        sparse_id_values = sparse_tensor_py.SparseTensor(
            indices, bucket_indices, shape)

        return sparse_id_values
Example #27
0
 def testBowEncoderSparseTensorDenseLookup(self):
   with self.cached_session():
     docs = [[0, 1]]
     sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
     with self.assertRaises(TypeError):
       encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
 def test_convert_to_sparse_undef_shape(self):
     with self.test_session():
         with self.assertRaises(ValueError):
             tensor = array_ops.placeholder(dtype=dtypes.int32)
             sparse_ops.dense_to_sparse_tensor(tensor)
Example #29
0
 def test_convert_to_sparse_undef_shape(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       tensor = array_ops.placeholder(dtype=dtypes.int32)
       sparse_ops.dense_to_sparse_tensor(tensor)
Example #30
0
 def testBowEncoderSparseTensorDenseLookup(self):
     with self.test_session():
         docs = [[0, 1]]
         sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
         with self.assertRaises(TypeError):
             encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)