def test_similar_by_sentence(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     o = m.sv.similar_by_sentence(sentence=["the", "product", "is", "good"],
                                  model=m)
     self.assertEqual(4, o[0][0])
Exemple #2
0
 def test_similar_by_sentence_wrong_model(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     with self.assertRaises(RuntimeError):
         m.sv.similar_by_sentence(sentence=["the", "product", "is", "good"],
                                  model=W2V)
 def test_similar_by_word(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     o = m.sv.similar_by_word(word="the", wv=m.wv)
     self.assertEqual(96, o[0][0])
     o = m.sv.similar_by_word(word="the", wv=m.wv, indexable=sentences)
     self.assertEqual(96, o[0][1])
 def test_most_similar_wrong_indexable(self):
     def indexable(self):
         pass
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     with self.assertRaises(RuntimeError):
         m.sv.most_similar(positive=0, indexable=indexable)
 def test_most_similar_vecs(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     m.sv.init_sims()
     v = m.sv[[0, 1]]
     o = m.sv.most_similar(positive=v)
     self.assertEqual(1, o[0][0])
     self.assertEqual(0, o[1][0])
 def test_most_similar_vec(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     m.sv.init_sims()
     v = m.sv.get_vector(0, use_norm=True)
     o = m.sv.most_similar(positive=v)
     # Includes 0 obviously
     self.assertEqual(45, o[1][0])
     self.assertEqual(35, o[2][0])
    def test_most_similar(self):
        sent_ind = IndexedList(SENTENCES)
        sentences = IndexedLineDocument(CORPUS)
        m = Average(W2V)
        m.train(sentences)
        o = m.sv.most_similar(positive=0)
        self.assertEqual(45, o[0][0])
        self.assertEqual(35, o[1][0])
        o = m.sv.most_similar(positive=0, indexable=sentences)
        self.assertEqual("Looks good and fits snug", o[0][0])

        o = m.sv.most_similar(positive=0, indexable=sent_ind)
        self.assertEqual("Looks good and fits snug".split(), o[0][0][0])
    def test_most_similar_restrict_size_tuple(self):
        sentences = IndexedLineDocument(CORPUS)
        m = Average(W2V)
        m.train(sentences)
        o = m.sv.most_similar(positive=20, topn=20, restrict_size=(5, 25))
        self.assertEqual(19, len(o))
        self.assertEqual(22, o[0][0])

        o = m.sv.most_similar(positive=1, topn=20, restrict_size=(5, 25))
        self.assertEqual(20, len(o))
        self.assertEqual(9, o[0][0])

        o = m.sv.most_similar(positive=1, topn=20, restrict_size=(5, 25), indexable=sentences)
        self.assertEqual(20, len(o))
        self.assertEqual(9, o[0][1])
 def test_similar_by_vector(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     o = m.sv.similar_by_vector(m.wv["the"])
     self.assertEqual(96, o[0][0])
 def test_most_similar_restrict_size(self):
     sentences = IndexedLineDocument(CORPUS)
     m = Average(W2V)
     m.train(sentences)
     o = m.sv.most_similar(positive=20, topn=20, restrict_size=5)
     self.assertEqual(5, len(o))
 def setUp(self):
     self.p = "fse/test/test_data/test_sentences.txt"
     self.doc = IndexedLineDocument(self.p)
Exemple #12
0
 def setUp(self):
     self.sentences = IndexedLineDocument(CORPUS)
     self.model = SIF(W2V, lang_freq="en")