예제 #1
0
 def test_filtered_corpus_reader_words(self):
     """Test filtered corpus words method."""
     words = self.reader.words()
     uniq_words = distinct_words(words)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(uniq_words)
예제 #2
0
 def test_filtered_corpus_reader_words(self):
     """Test filtered corpus words method."""
     words = self.reader.words()
     uniq_words = distinct_words(words)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(uniq_words)
예제 #3
0
 def test_filtered_corpus_reader_paras(self):
     """Test filtered corpus paras method."""
     paras = self.reader.paras()
     sents = [sent for para in paras for sent in para]
     uniq_words = distinct_words(sents)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(uniq_words)
예제 #4
0
 def test_filtered_corpus_reader_sents(self):
     """Test filtered corpus sents method."""
     sents = self.reader.sents()
     uniq_words = distinct_words(sents)
     # Curious—why the original test checked for two different words?
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     # You can check for uniq_words because it implies that sents had content
     self.assertTrue(uniq_words)
예제 #5
0
 def test_filtered_corpus_reader_sents(self):
     """Test filtered corpus sents method."""
     sents = self.reader.sents()
     uniq_words = distinct_words(sents)
     # Curious—why the original test checked for two different words?
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     # You can check for uniq_words because it implies that sents had content
     self.assertTrue(uniq_words)
예제 #6
0
 def test_filtered_corpus_reader_paras(self):
     """Test filtered corpus paras method."""
     paras = self.reader.paras()
     sents = [sent
              for para in paras
              for sent in para]
     uniq_words = distinct_words(sents)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(uniq_words)
예제 #7
0
 def test_filtered_corpus_reader_docs(self):
     """Test filtered corpus docs method."""
     reader = get_corpus_reader(language='latin', corpus_name='latin_text_latin_library')
     reader._fileids = ['catullus.txt']
     docs = list(reader.docs())
     words = distinct_words(docs)
     if 'Latin' in words:
         self.fail('Filtered word present!')
     if 'Library' in words:
         self.fail('Filtered word present!')
     self.assertTrue(len(docs) > 0)
예제 #8
0
 def test_filtered_corpus_reader_paras(self):
     """Test filtered corpus paras method."""
     reader = get_corpus_reader(language='latin',
                                corpus_name='latin_text_latin_library')
     reader._fileids = ['catullus.txt']
     paras = list(reader.paras())
     sents = [sent for para in paras for sent in para]
     uniq_words = distinct_words(sents)
     if 'Latin' in uniq_words:
         self.fail('Filtered word present!')
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(len(paras) > 0)
예제 #9
0
 def test_filtered_corpus_reader_docs(self):
     """Test filtered corpus docs method."""
     docs = list(self.reader.docs())
     uniq_words = distinct_words(docs)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(len(docs) > 0)
     problem_files = ['caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt',
                  'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt',
                  'varro.ll9.txt']
     for filename in problem_files:
         doc = list(self.reader.docs([filename]))
         assert(doc)
         assert(len(doc[0]) > 100)
예제 #10
0
 def test_filtered_corpus_reader_docs(self):
     """Test filtered corpus docs method."""
     docs = list(self.reader.docs())
     uniq_words = distinct_words(docs)
     if 'Library' in uniq_words:
         self.fail('Filtered word present!')
     self.assertTrue(len(docs) > 0)
     problem_files = [
         'caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt',
         'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt',
         'varro.ll9.txt'
     ]
     for filename in problem_files:
         doc = list(self.reader.docs([filename]))
         assert (doc)
         assert (len(doc[0]) > 100)
예제 #11
0
 def test_filtered_corpus_reader_docs(self):
     """Test filtered corpus docs method."""
     reader = get_corpus_reader(language='latin',
                                corpus_name='latin_text_latin_library')
     reader._fileids = ['catullus.txt']
     docs = list(reader.docs())
     words = distinct_words(docs)
     if 'Latin' in words:
         self.fail('Filtered word present!')
     if 'Library' in words:
         self.fail('Filtered word present!')
     self.assertTrue(len(docs) > 0)
     problem_files = [
         'caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt',
         'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt',
         'varro.ll9.txt'
     ]
     for filename in problem_files:
         doc = list(reader.docs([filename]))
         assert (doc)
         assert (len(doc[0]) > 100)