Example #1
0
 def test_expand_model(self, n=10):
     model = Document2Vec(w2v_file)
     corpus = _generate_corpus(model, n=n)
     shape_before = model.syn0.shape
     model._expand_from(corpus)
     self.assertEqual(shape_before[0] + n, model.syn0.shape[0])
     self.assertIn('SENT_0', model.index2word)
Example #2
0
 def test_checkpoint(self):
     model = Document2Vec(w2v_file)
     checksum = model.syn0.sum()
     model._build_checkpoint()
     model.syn0 *= 2.0
     new_checksum = model.syn0.sum()
     self.assertNotEqual(new_checksum, checksum)
     model._reset_to_checkpoint()
     new_checksum = model.syn0.sum()
     self.assertEqual(new_checksum, checksum)
Example #3
0
 def test_transform(self):
     """ Test that training the model brings the document vector
         closer to the vectors for words in the sentence"""
     model = Document2Vec(w2v_file)
     model.workers = 1
     corpus = _generate_corpus(model)
     # vectors = model.fit_transform(corpus)
     # Get the first word in the corpus
     vectors = model.transform(corpus)
     word = next(corpus.__iter__()).words[0]
     sent0_vector = vectors[0, :]
     sim = cosine(sent0_vector, model[word])
     self.assertGreater(sim, 0.15)
Example #4
0
 def test_labeledlinesentence(self):
     model = Document2Vec(w2v_file)
     model.workers = 1
     corpus = _generate_corpus(model)
     fn = '/tmp/tmp_corpus'
     with open(fn, 'w') as fh:
         for line in corpus:
             text = ' '.join([w for w in line.words])
             try:
                 fh.write(text + '\n')
             except:
                 continue
     corpus = LabeledLineSentence(fn)
     # vectors = model.fit_transform(corpus)
     # Get the first word in the corpus
     model.fit_transform(corpus)
     word = next(corpus.__iter__()).words[0]
     sim = model.similarity('SENT_0', word)
     self.assertGreater(sim, 0.15)
Example #5
0
 def test_word_similarity(self):
     model = Document2Vec(w2v_file)
     sim = model.similarity('blue', 'gold')
     self.assertGreater(sim, 0.3)
Example #6
0
 def test_get_vector(self):
     model = Document2Vec(w2v_file)
     v = model.get_vector('the')
     self.assertIs(type(v), np.ndarray)
Example #7
0
 def test_load_from_w2v(self):
     model = Document2Vec(w2v_file)
     self.assertIsNot(type(model), None)
     self.assertIs(type(model), Document2Vec)
     self.assertIn('jacket', model.index2word)
Example #8
0
 def test_init(self):
     m = Document2Vec()
     assert 'train_lbls' in dir(m)