Esempio n. 1
0
 def testBowEncoderSparseTensorDenseLookup(self):
     with self.test_session():
         docs = [[0, 1]]
         sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(
             docs)
         with self.assertRaises(TypeError):
             encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
Esempio n. 2
0
 def testBowEncodersSharingEmbeddings(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc_1 = encoders.bow_encoder(docs, 4, 3, scope="test")
         enc_2 = encoders.bow_encoder(docs, 4, 3, scope="test", reuse=True)
         sess.run(tf.initialize_all_variables())
         avg_1, avg_2 = sess.run([enc_1, enc_2])
         self.assertAllEqual(avg_1, avg_2)
Esempio n. 3
0
 def testBowEncodersSharingEmbeddings(self):
   with self.cached_session() as sess:
     docs = [[0, 1], [2, 3]]
     enc_1 = encoders.bow_encoder(docs, 4, 3, scope='test')
     enc_2 = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True)
     sess.run(variables.global_variables_initializer())
     avg_1, avg_2 = sess.run([enc_1, enc_2])
     self.assertAllEqual(avg_1, avg_2)
Esempio n. 4
0
 def testBowEncodersSharingEmbeddings(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc_1 = encoders.bow_encoder(docs, 4, 3, scope='test')
         enc_2 = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True)
         sess.run(variables.global_variables_initializer())
         avg_1, avg_2 = sess.run([enc_1, enc_2])
         self.assertAllEqual(avg_1, avg_2)
Esempio n. 5
0
 def testBowEncodersSharingEmbeddingsSharedScope(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc_1 = encoders.bow_encoder(docs, 4, 3, scope='bow')
         variable_scope.get_variable_scope().reuse_variables()
         enc_2 = encoders.bow_encoder(docs, 4, 3, scope='bow')
         sess.run(variables.global_variables_initializer())
         avg_1, avg_2 = sess.run([enc_1, enc_2])
         self.assertAllEqual(avg_1, avg_2)
Esempio n. 6
0
 def testBowEncodersSharingEmbeddingsSharedScope(self):
   with self.cached_session() as sess:
     docs = [[0, 1], [2, 3]]
     enc_1 = encoders.bow_encoder(docs, 4, 3, scope='bow')
     variable_scope.get_variable_scope().reuse_variables()
     enc_2 = encoders.bow_encoder(docs, 4, 3, scope='bow')
     sess.run(variables.global_variables_initializer())
     avg_1, avg_2 = sess.run([enc_1, enc_2])
     self.assertAllEqual(avg_1, avg_2)
Esempio n. 7
0
 def testBowEncodersSharingEmbeddingsSharedScope(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc_1 = encoders.bow_encoder(docs, 4, 3, scope="bow")
         tf.get_variable_scope().reuse_variables()
         enc_2 = encoders.bow_encoder(docs, 4, 3, scope="bow")
         sess.run(tf.initialize_all_variables())
         avg_1, avg_2 = sess.run([enc_1, enc_2])
         self.assertAllEqual(avg_1, avg_2)
Esempio n. 8
0
 def testBowEncodersSharingEmbeddingsInheritedScopes(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         with tf.variable_scope('test'):
             enc_1 = encoders.bow_encoder(docs, 4, 3)
         with tf.variable_scope('test', reuse=True):
             enc_2 = encoders.bow_encoder(docs, 4, 3)
         sess.run(tf.initialize_all_variables())
         avg_1, avg_2 = sess.run([enc_1, enc_2])
         self.assertAllEqual(avg_1, avg_2)
Esempio n. 9
0
 def testBowEncodersSharingEmbeddingsInheritedScopes(self):
   with self.test_session() as sess:
     docs = [[0, 1], [2, 3]]
     with tf.variable_scope('test'):
       enc_1 = encoders.bow_encoder(docs, 4, 3)
     with tf.variable_scope('test', reuse=True):
       enc_2 = encoders.bow_encoder(docs, 4, 3)
     sess.run(tf.global_variables_initializer())
     avg_1, avg_2 = sess.run([enc_1, enc_2])
     self.assertAllEqual(avg_1, avg_2)
Esempio n. 10
0
 def testBowEncoderSparseTensor(self):
   with self.cached_session() as sess:
     docs = [[0, 1], [2, 3]]
     sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
     enc = encoders.bow_encoder(sparse_docs, 4, 3)
     sess.run(variables.global_variables_initializer())
     self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 11
0
 def testBowEncoderSparseTensor(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
         enc = encoders.bow_encoder(sparse_docs, 4, 3)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([2, 3], enc.eval().shape)
def bag_of_words_model(features, target):
    """
    先转成词袋模型
    :param features:
    :param target:
    :return:
    """
    target = tf.one_hot(target, 15, 1, 0)
    features = encoders.bow_encoder(features,
                                    vocab_size=n_words,
                                    embed_dim=EMBEDDING_SIZE)
    logits = tf.contrib.layers.fully_connected(features,
                                               15,
                                               activation_fn=None)
    loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
    train_op = tf.contrib.layers.optimize_loss(
        loss,
        tf.contrib.framework.get_global_step(),
        optimizer='Adam',
        learning_rate=0.01)

    return ({
        'class': tf.argmax(logits, 1),
        'prob': tf.nn.softmax(logits)
    }, loss, train_op)
Esempio n. 13
0
 def testBowEncoderSparseTensor(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(docs)
         enc = encoders.bow_encoder(sparse_docs, 4, 3)
         sess.run(tf.initialize_all_variables())
         self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 14
0
 def testBowEncoderSparseTensor(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(
             docs)
         enc = encoders.bow_encoder(sparse_docs, 4, 3)
         sess.run(tf.initialize_all_variables())
         self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 15
0
 def testBowEncoderReuseEmbeddingsVariable(self):
     with self.test_session() as sess:
         docs = [[1, 1], [2, 3]]
         with tf.variable_scope("test"):
             v = _get_const_var("embeddings", (4, 3), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
             self.assertEqual(v.name, "test/embeddings:0")
         enc = encoders.bow_encoder(docs, 4, 3, scope="test", reuse=True)
         sess.run(tf.initialize_all_variables())
         self.assertAllClose([[3.0, 4.0, 5.0], [7.5, 8.5, 9.5]], enc.eval())
 def testBowEncoderReuseEmbeddingsVariable(self):
   with self.test_session() as sess:
     docs = [[1, 1], [2, 3]]
     with variable_scope.variable_scope('test'):
       v = _get_const_var('embeddings', (4, 3),
                          [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
       self.assertEqual(v.name, 'test/embeddings:0')
     enc = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True)
     sess.run(variables.global_variables_initializer())
     self.assertAllClose([[3., 4., 5.], [7.5, 8.5, 9.5]], enc.eval())
Esempio n. 17
0
 def testBowEncoderReuseEmbeddingsVariable(self):
   with self.cached_session() as sess:
     docs = [[1, 1], [2, 3]]
     with variable_scope.variable_scope('test'):
       v = _get_const_var('embeddings', (4, 3),
                          [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]])
       self.assertEqual(v.name, 'test/embeddings:0')
     enc = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True)
     sess.run(variables.global_variables_initializer())
     self.assertAllClose([[3., 4., 5.], [7.5, 8.5, 9.5]], enc.eval())
def bag_of_words_model(features, target):
  """A bag-of-words model. Note it disregards the word order in the text."""
  target = tf.one_hot(target, 15, 1, 0)
  features = encoders.bow_encoder(
      features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE)
  logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None)
  loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
  train_op = tf.contrib.layers.optimize_loss(
      loss,
      tf.contrib.framework.get_global_step(),
      optimizer='Adam',
      learning_rate=0.01)
  return ({
      'class': tf.argmax(logits, 1),
      'prob': tf.nn.softmax(logits)
  }, loss, train_op)
Esempio n. 19
0
def bag_of_words_model(features, target):
    """A bag-of-words model. Note it disregards the word order in the text."""
    target = tf.one_hot(target, 15, 1, 0)
    features = encoders.bow_encoder(features,
                                    vocab_size=setting.n_words,
                                    embed_dim=BOW_EMBEDING_DIM)
    logits = tf.contrib.layers.fully_connected(features, 15,
                                               activation_fn)  #=None)
    loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
    #loss = tf.losses.softmax_cross_entropy(logits, target)
    train_op = tf.contrib.layers.optimize_loss(
        loss,
        tf.contrib.framework.get_global_step(),
        optimizer='Adam',
        learning_rate=0.01)
    return ({
        'class': tf.argmax(logits, 1),
        'prob': tf.nn.softmax(logits)
    }, loss, train_op)
Esempio n. 20
0
 def testBowEncoderDense(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3], [0, 0], [0, 0]]
         enc = encoders.bow_encoder(docs, 4, 3, sparse_lookup=False)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([4, 3], enc.eval().shape)
Esempio n. 21
0
 def testBowEncoderSparseTensorDenseLookup(self):
     with self.test_session():
         docs = [[0, 1]]
         sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(docs)
         with self.assertRaises(TypeError):
             encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
Esempio n. 22
0
 def testBowEncoderDense(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3], [0, 0], [0, 0]]
         enc = encoders.bow_encoder(docs, 4, 3, sparse_lookup=False)
         sess.run(tf.initialize_all_variables())
         self.assertAllEqual([4, 3], enc.eval().shape)
Esempio n. 23
0
 def testBowEncoderSparseEmptyRow(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3], [0, 0]]
         enc = encoders.bow_encoder(docs, 4, 5)
         sess.run(tf.initialize_all_variables())
         self.assertAllEqual([3, 5], enc.eval().shape)
Esempio n. 24
0
 def testBowEncoderSparse(self):
   with self.test_session() as sess:
     docs = [[0, 1], [2, 3]]
     enc = encoders.bow_encoder(docs, 4, 3)
     sess.run(tf.global_variables_initializer())
     self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 25
0
 def testBowEncoderSparse(self):
     with self.cached_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc = encoders.bow_encoder(docs, 4, 3)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 26
0
raw_docs = [
" Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972."
," Schwan-STABILO is a German maker of pens for writing colouring and cosmetics as well as markers and highlighters for office use. It is the world's largest manufacturer of highlighter pens Stabilo Boss."
" Q-workshop is a Polish company located in Poznań that specializes in designand production of polyhedral dice and dice accessories for use in various games (role-playing gamesboard games and tabletop wargames). They also run an online retail store and maintainan active forum community.Q-workshop was established in 2001 by Patryk Strzelewicz – a student from Poznań. Initiallythe company sold its products via online auction services but in 2005 a website and online store wereestablished."
]

import tensorflow as tf
from tensorflow.contrib.layers.python.layers import encoders
if __name__ == '__main__':
    tokenizer = Tokenizer()
    docs =[]
 
    cat_vocab = CatVocabulary(50, tokenizer.tokenizer0)
    #fit
    cat_vocab.fit(raw_docs)
    # get ids
    for id in cat_vocab.get_vec(raw_docs):
        print(id)
        docs.append(id)

    print(docs)
    docs = np.array(list(docs))
    print(docs.shape)
    with tf.Session() as sess:
        #docs = [[0, 1], [2, 3]]
        enc = encoders.bow_encoder(docs, len(cat_vocab), 80)
        sess.run(tf.global_variables_initializer())
	#self.assertAllEqual([2, 3], enc.eval().shape)
        print(enc.eval())
        print(enc.eval().shape)
Esempio n. 27
0
 def testBowEncoderSparse(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3]]
         enc = encoders.bow_encoder(docs, 4, 3)
         sess.run(tf.initialize_all_variables())
         self.assertAllEqual([2, 3], enc.eval().shape)
Esempio n. 28
0
 def testBowEncoderSparseEmptyRow(self):
     with self.test_session() as sess:
         docs = [[0, 1], [2, 3], [0, 0]]
         enc = encoders.bow_encoder(docs, 4, 5)
         sess.run(variables.global_variables_initializer())
         self.assertAllEqual([3, 5], enc.eval().shape)
Esempio n. 29
0

def emb_bag_of_words_model(features, target):
    """A bag-of-words model. Note it disregards the word order in the text."""
    target = tf.one_hot(target, 15, 1, 0)
    ##  features = encoders.bow_encoder(
    ##      features, vocab_size=setting.n_words, embed_dim=BOW_EMBEDING_DIM)
    logits = tf.contrib.layers.fully_connected(features, 15,
                                               activation_fn)  #=None)
    loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
    #loss = tf.losses.softmax_cross_entropy(logits, target)
    train_op = tf.contrib.layers.optimize_loss(
        loss,
        tf.contrib.framework.get_global_step(),
        optimizer='Adam',
        learning_rate=0.01)
    return ({
        'class': tf.argmax(logits, 1),
        'prob': tf.nn.softmax(logits)
    }, loss, train_op)


# test
if __name__ == '__main__':
    with tf.Session() as sess:
        docs = [[0, 1], [2, 3]]
        enc = encoders.bow_encoder(docs, 4, 3)
        sess.run(tf.global_variables_initializer())
        #self.assertAllEqual([2, 3], enc.eval().shape)
        print(enc.eval())