def testBowEncoderSparseTensorDenseLookup(self): with self.test_session(): docs = [[0, 1]] sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor( docs) with self.assertRaises(TypeError): encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
def testBowEncodersSharingEmbeddings(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope="test") enc_2 = encoders.bow_encoder(docs, 4, 3, scope="test", reuse=True) sess.run(tf.initialize_all_variables()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddings(self): with self.cached_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope='test') enc_2 = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True) sess.run(variables.global_variables_initializer()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddings(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope='test') enc_2 = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True) sess.run(variables.global_variables_initializer()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddingsSharedScope(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope='bow') variable_scope.get_variable_scope().reuse_variables() enc_2 = encoders.bow_encoder(docs, 4, 3, scope='bow') sess.run(variables.global_variables_initializer()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddingsSharedScope(self): with self.cached_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope='bow') variable_scope.get_variable_scope().reuse_variables() enc_2 = encoders.bow_encoder(docs, 4, 3, scope='bow') sess.run(variables.global_variables_initializer()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddingsSharedScope(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc_1 = encoders.bow_encoder(docs, 4, 3, scope="bow") tf.get_variable_scope().reuse_variables() enc_2 = encoders.bow_encoder(docs, 4, 3, scope="bow") sess.run(tf.initialize_all_variables()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddingsInheritedScopes(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] with tf.variable_scope('test'): enc_1 = encoders.bow_encoder(docs, 4, 3) with tf.variable_scope('test', reuse=True): enc_2 = encoders.bow_encoder(docs, 4, 3) sess.run(tf.initialize_all_variables()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncodersSharingEmbeddingsInheritedScopes(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] with tf.variable_scope('test'): enc_1 = encoders.bow_encoder(docs, 4, 3) with tf.variable_scope('test', reuse=True): enc_2 = encoders.bow_encoder(docs, 4, 3) sess.run(tf.global_variables_initializer()) avg_1, avg_2 = sess.run([enc_1, enc_2]) self.assertAllEqual(avg_1, avg_2)
def testBowEncoderSparseTensor(self): with self.cached_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(variables.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderSparseTensor(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(variables.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
def bag_of_words_model(features, target): """ 先转成词袋模型 :param features: :param target: :return: """ target = tf.one_hot(target, 15, 1, 0) features = encoders.bow_encoder(features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE) logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ({ 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) }, loss, train_op)
def testBowEncoderSparseTensor(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(tf.initialize_all_variables()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderSparseTensor(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor( docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(tf.initialize_all_variables()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderReuseEmbeddingsVariable(self): with self.test_session() as sess: docs = [[1, 1], [2, 3]] with tf.variable_scope("test"): v = _get_const_var("embeddings", (4, 3), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) self.assertEqual(v.name, "test/embeddings:0") enc = encoders.bow_encoder(docs, 4, 3, scope="test", reuse=True) sess.run(tf.initialize_all_variables()) self.assertAllClose([[3.0, 4.0, 5.0], [7.5, 8.5, 9.5]], enc.eval())
def testBowEncoderReuseEmbeddingsVariable(self): with self.test_session() as sess: docs = [[1, 1], [2, 3]] with variable_scope.variable_scope('test'): v = _get_const_var('embeddings', (4, 3), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) self.assertEqual(v.name, 'test/embeddings:0') enc = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True) sess.run(variables.global_variables_initializer()) self.assertAllClose([[3., 4., 5.], [7.5, 8.5, 9.5]], enc.eval())
def testBowEncoderReuseEmbeddingsVariable(self): with self.cached_session() as sess: docs = [[1, 1], [2, 3]] with variable_scope.variable_scope('test'): v = _get_const_var('embeddings', (4, 3), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) self.assertEqual(v.name, 'test/embeddings:0') enc = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True) sess.run(variables.global_variables_initializer()) self.assertAllClose([[3., 4., 5.], [7.5, 8.5, 9.5]], enc.eval())
def bag_of_words_model(features, target): """A bag-of-words model. Note it disregards the word order in the text.""" target = tf.one_hot(target, 15, 1, 0) features = encoders.bow_encoder( features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE) logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ({ 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) }, loss, train_op)
def bag_of_words_model(features, target): """A bag-of-words model. Note it disregards the word order in the text.""" target = tf.one_hot(target, 15, 1, 0) features = encoders.bow_encoder(features, vocab_size=setting.n_words, embed_dim=BOW_EMBEDING_DIM) logits = tf.contrib.layers.fully_connected(features, 15, activation_fn) #=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) #loss = tf.losses.softmax_cross_entropy(logits, target) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ({ 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) }, loss, train_op)
def testBowEncoderDense(self): with self.test_session() as sess: docs = [[0, 1], [2, 3], [0, 0], [0, 0]] enc = encoders.bow_encoder(docs, 4, 3, sparse_lookup=False) sess.run(variables.global_variables_initializer()) self.assertAllEqual([4, 3], enc.eval().shape)
def testBowEncoderSparseTensorDenseLookup(self): with self.test_session(): docs = [[0, 1]] sparse_docs = tf.contrib.layers.sparse_ops.dense_to_sparse_tensor(docs) with self.assertRaises(TypeError): encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
def testBowEncoderDense(self): with self.test_session() as sess: docs = [[0, 1], [2, 3], [0, 0], [0, 0]] enc = encoders.bow_encoder(docs, 4, 3, sparse_lookup=False) sess.run(tf.initialize_all_variables()) self.assertAllEqual([4, 3], enc.eval().shape)
def testBowEncoderSparseEmptyRow(self): with self.test_session() as sess: docs = [[0, 1], [2, 3], [0, 0]] enc = encoders.bow_encoder(docs, 4, 5) sess.run(tf.initialize_all_variables()) self.assertAllEqual([3, 5], enc.eval().shape)
def testBowEncoderSparse(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc = encoders.bow_encoder(docs, 4, 3) sess.run(tf.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderSparse(self): with self.cached_session() as sess: docs = [[0, 1], [2, 3]] enc = encoders.bow_encoder(docs, 4, 3) sess.run(variables.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
raw_docs = [ " Abbott of Farnham E D Abbott Limited was a British coachbuilding business based in Farnham Surrey trading under that name from 1929. A major part of their output was under sub-contract to motor vehicle manufacturers. Their business closed in 1972." ," Schwan-STABILO is a German maker of pens for writing colouring and cosmetics as well as markers and highlighters for office use. It is the world's largest manufacturer of highlighter pens Stabilo Boss." " Q-workshop is a Polish company located in Poznań that specializes in designand production of polyhedral dice and dice accessories for use in various games (role-playing gamesboard games and tabletop wargames). They also run an online retail store and maintainan active forum community.Q-workshop was established in 2001 by Patryk Strzelewicz – a student from Poznań. Initiallythe company sold its products via online auction services but in 2005 a website and online store wereestablished." ] import tensorflow as tf from tensorflow.contrib.layers.python.layers import encoders if __name__ == '__main__': tokenizer = Tokenizer() docs =[] cat_vocab = CatVocabulary(50, tokenizer.tokenizer0) #fit cat_vocab.fit(raw_docs) # get ids for id in cat_vocab.get_vec(raw_docs): print(id) docs.append(id) print(docs) docs = np.array(list(docs)) print(docs.shape) with tf.Session() as sess: #docs = [[0, 1], [2, 3]] enc = encoders.bow_encoder(docs, len(cat_vocab), 80) sess.run(tf.global_variables_initializer()) #self.assertAllEqual([2, 3], enc.eval().shape) print(enc.eval()) print(enc.eval().shape)
def testBowEncoderSparse(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] enc = encoders.bow_encoder(docs, 4, 3) sess.run(tf.initialize_all_variables()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderSparseEmptyRow(self): with self.test_session() as sess: docs = [[0, 1], [2, 3], [0, 0]] enc = encoders.bow_encoder(docs, 4, 5) sess.run(variables.global_variables_initializer()) self.assertAllEqual([3, 5], enc.eval().shape)
def emb_bag_of_words_model(features, target): """A bag-of-words model. Note it disregards the word order in the text.""" target = tf.one_hot(target, 15, 1, 0) ## features = encoders.bow_encoder( ## features, vocab_size=setting.n_words, embed_dim=BOW_EMBEDING_DIM) logits = tf.contrib.layers.fully_connected(features, 15, activation_fn) #=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) #loss = tf.losses.softmax_cross_entropy(logits, target) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ({ 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) }, loss, train_op) # test if __name__ == '__main__': with tf.Session() as sess: docs = [[0, 1], [2, 3]] enc = encoders.bow_encoder(docs, 4, 3) sess.run(tf.global_variables_initializer()) #self.assertAllEqual([2, 3], enc.eval().shape) print(enc.eval())