Ejemplo n.º 1
0
 def test_invalid_conf(self):
     invalid_configs = [GensimConfig(epochs=0), GensimConfig(window=0),
                        GensimConfig(size=0), GensimConfig(max_vocab_size=0)]
     for config in invalid_configs:
         embeddings = Word2VecEmbedding(gensim_conf=config)
         with pytest.raises(InvalidArgumentError):
             embeddings.train(self.valid_input)
Ejemplo n.º 2
0
 def test_pretrained_word2vec(self):
     model = Word2Vec.load(
         os.path.join(self.embeddings_dir, 'w2v_test.model'))
     embeddings = Word2VecEmbedding(model=model)
     embeddings.train(None)  # shouldn't do anything in train
     self.assertEqual(embeddings.model, model)
     self.assertEqual(embeddings.to_id('pron'), 0)
     self.assertEqual(embeddings.to_id('invented'), None)
Ejemplo n.º 3
0
 def test_to_vector(self):
     config = GensimConfig(epochs=5, max_vocab_size=None,
                           min_count=1, size=5)
     w2v = Word2VecEmbedding(gensim_conf=config)
     w2v.train(self.valid_input)
     self.assertEqual(len(w2v.to_vector('sentence')), 5)
     d2v = Doc2VecEmbedding(gensim_conf=config)
     d2v.train(self.valid_input)
     self.assertEqual(len(d2v.to_vector(['my', 'sentence'])), 5)
Ejemplo n.º 4
0
 def setUp(self):
     embeddings_dir = os.path.join('test', 'data', 'embeddings')
     model = Word2Vec.load(os.path.join(embeddings_dir, 'w2v_test.model'))
     self.embeddings = Word2VecEmbedding(model=model)
     self.x_train = [[0, 2, 1, 0], [2, 1, 0, 0], [3, 1, 0, 1], [2, 0, 0, 0],
                     [1, 3, 3, 1]]
     self.y_train = [0, 0, 1, 0, 1]
     self.x_val = [[0, 1, 2, 0], [2, 1, 2, 3]]
     self.y_val = [0, 1]
     tf.reset_default_graph()
Ejemplo n.º 5
0
 def test_save_embeddings(self):
     conf = GensimConfig(size=2, min_count=1)
     embeddings = Word2VecEmbedding(conf)
     embeddings.train(self.valid_input)
     embeddings.save_embeddings('emb')
     emb_arr = np.load('emb.npy')
     self.assertEqual(np.shape(emb_arr), (14, 2))
     with open('emb.vocab', 'r') as f:
         vocab = f.readlines()
         self.assertEqual(len(vocab), 14)
     os.remove('emb.npy')
     os.remove('emb.vocab')
Ejemplo n.º 6
0
 def test_to_id(self):
     config = GensimConfig(epochs=5, max_vocab_size=None, min_count=1)
     embeddings = Word2VecEmbedding(gensim_conf=config)
     embeddings.train(self.valid_input)
     self.assertEqual(embeddings.to_id('sentence'), 2)
     self.assertEqual(embeddings.to_word(2), 'sentence')
Ejemplo n.º 7
0
 def test_invalid_input(self):
     conf = GensimConfig(min_count=1)
     embeddings = Word2VecEmbedding(conf)
     with pytest.raises(InvalidArgumentError):
         embeddings.train(self.invalid_input)