def test_corpus_stop_list_entropy(self): """Test production of stoplists from a corpus, using basis: entropy""" target_list = ['ac', 'ad', 'atque', 'cum', 'et', 'in', 'mihi', 'qui', 'rerum', 'vel'] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.test_corpus, size=10, basis='entropy', inc_values=False) self.assertEqual(stoplist, target_list)
def test_corpus_stop_list_freq_sort_words(self): """Test production of stoplists from a corpus, using basis: frequency""" target_list = ['in', 'et', 'vel', 'ac', 'cum', 'qui', 'atque', 'mihi', 'ad', 'neque'] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.test_corpus, size=10, basis='frequency', inc_values=False, sort_words=False) self.assertEqual(stoplist, target_list)
def test_corpus_latin(self): """Test production of Latin stoplists from a corpus""" target_list = ['ac', 'atque', 'cum', 'et', 'in', 'mihi', 'neque', 'qui', 'rerum', 'vel'] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.latin_test_corpus, size=10, basis='zou', inc_values=False) self.assertEqual(stoplist, target_list)
def test_corpus_stop_list_freq_inc_values(self): """Test production of stoplists from a corpus with values, using basis: frequency""" target_list = [('ac', 8), ('ad', 5), ('atque', 6), ('cum', 8), ('et', 15), ('in', 18), ('mihi', 6), ('neque', 5), ('qui', 7), ('vel', 9)] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.test_corpus, size=10, basis='frequency', inc_values=True) self.assertEqual(stoplist, target_list)
def test_corpus_stop_list_variance(self): """Test production of stoplists from a corpus, using basis: variance""" target_list = [ 'ac', 'atque', 'cum', 'et', 'in', 'mihi', 'neque', 'qui', 'rerum', 'vel' ] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.test_corpus, size=10, basis='variance') self.assertEqual(stoplist, target_list)
def test_corpus_stop_list_freq_include(self): """Test production of stoplists from a corpus, using basis: frequency""" target_list = [ 'ac', 'ad', 'atque', 'cum', 'est', 'et', 'in', 'mihi', 'neque', 'qui', 'vel' ] S = LatinCorpusStoplist() stoplist = S.build_stoplist(self.test_corpus, size=10, basis='frequency', include=['est']) self.assertEqual(stoplist, target_list)