Example #1
0
 def test_corpus_stop_list_entropy(self):
     """Test production of stoplists from a corpus, using basis: entropy"""
     target_list = ['ac', 'ad', 'atque', 'cum', 'et', 'in', 'mihi', 'qui', 'rerum', 'vel']
     S = LatinCorpusStoplist()
     stoplist = S.build_stoplist(self.test_corpus, size=10,
                 basis='entropy', inc_values=False)
     self.assertEqual(stoplist, target_list)
Example #2
0
    def test_corpus_stop_list_freq_sort_words(self):
        """Test production of stoplists from a corpus, using basis: frequency"""
        target_list = ['in', 'et', 'vel', 'ac', 'cum', 'qui', 'atque', 'mihi', 'ad', 'neque']

        S = LatinCorpusStoplist()
        stoplist = S.build_stoplist(self.test_corpus, size=10,
                    basis='frequency', inc_values=False, sort_words=False)
        self.assertEqual(stoplist, target_list)
Example #3
0
    def test_corpus_latin(self):
        """Test production of Latin stoplists from a corpus"""
        target_list = ['ac', 'atque', 'cum', 'et', 'in', 'mihi', 'neque',
                        'qui', 'rerum', 'vel']
        S = LatinCorpusStoplist()
        stoplist = S.build_stoplist(self.latin_test_corpus, size=10,
                    basis='zou', inc_values=False)

        self.assertEqual(stoplist, target_list)
Example #4
0
 def test_corpus_stop_list_freq_inc_values(self):
     """Test production of stoplists from a corpus with values,
     using basis: frequency"""
     target_list = [('ac', 8), ('ad', 5), ('atque', 6), ('cum', 8),
                     ('et', 15), ('in', 18), ('mihi', 6), ('neque', 5),
                     ('qui', 7), ('vel', 9)]
     S = LatinCorpusStoplist()
     stoplist = S.build_stoplist(self.test_corpus, size=10,
                 basis='frequency', inc_values=True)
     self.assertEqual(stoplist, target_list)
Example #5
0
 def test_corpus_stop_list_variance(self):
     """Test production of stoplists from a corpus, using basis: variance"""
     target_list = [
         'ac', 'atque', 'cum', 'et', 'in', 'mihi', 'neque', 'qui', 'rerum',
         'vel'
     ]
     S = LatinCorpusStoplist()
     stoplist = S.build_stoplist(self.test_corpus,
                                 size=10,
                                 basis='variance')
     self.assertEqual(stoplist, target_list)
Example #6
0
 def test_corpus_stop_list_freq_include(self):
     """Test production of stoplists from a corpus, using basis: frequency"""
     target_list = [
         'ac', 'ad', 'atque', 'cum', 'est', 'et', 'in', 'mihi', 'neque',
         'qui', 'vel'
     ]
     S = LatinCorpusStoplist()
     stoplist = S.build_stoplist(self.test_corpus,
                                 size=10,
                                 basis='frequency',
                                 include=['est'])
     self.assertEqual(stoplist, target_list)