Example #1
0
    def test_most_similar_simple(self):
        s = pd.Series(["one one one"])
        s = preprocessing.tokenize(s)
        df_embeddings = representation.word2vec(s, min_count=1, seed=1)

        to = "one"
        most_similar = representation.most_similar(df_embeddings, to)

        self.assertEqual(most_similar.shape, (1, ))
Example #2
0
 def test_incorrect_index_most_similar(self):
     s = pd.DataFrame([[1.0], [2.0]], index=["word1", "word2"])
     result_s = representation.most_similar(s, "word1")
     t_different_index = pd.DataFrame(s.values, index=None)
     self.assertFalse(result_s.index.equals(t_different_index.index))
Example #3
0
 def test_correct_index_most_similar(self):
     s = pd.DataFrame([[1.0], [2.0]], index=["word1", "word2"])
     result_s = representation.most_similar(s, "word1")
     t_same_index = pd.DataFrame(s.values, s.index)
     self.assertTrue(result_s.index.equals(t_same_index.index))
Example #4
0
 def test_most_similar_raise_with_not_in_index(self):
     s_embed = pd.DataFrame(data=[1], index=["one"])
     to = "two"
     with self.assertRaisesRegex(ValueError, r"index"):
         representation.most_similar(s_embed, to)
Example #5
0
    def test_most_similar_raise_with_series(self):
        s_embed = pd.Series({"one": 1})
        to = "one"

        with self.assertRaisesRegex(ValueError, r"Pandas|pandas"):
            representation.most_similar(s_embed, to)