def test_filtered_corpus_reader_words(self): """Test filtered corpus words method.""" words = self.reader.words() uniq_words = distinct_words(words) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(uniq_words)
def test_filtered_corpus_reader_words(self): """Test filtered corpus words method.""" words = self.reader.words() uniq_words = distinct_words(words) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(uniq_words)
def test_filtered_corpus_reader_paras(self): """Test filtered corpus paras method.""" paras = self.reader.paras() sents = [sent for para in paras for sent in para] uniq_words = distinct_words(sents) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(uniq_words)
def test_filtered_corpus_reader_sents(self): """Test filtered corpus sents method.""" sents = self.reader.sents() uniq_words = distinct_words(sents) # Curious—why the original test checked for two different words? if 'Library' in uniq_words: self.fail('Filtered word present!') # You can check for uniq_words because it implies that sents had content self.assertTrue(uniq_words)
def test_filtered_corpus_reader_sents(self): """Test filtered corpus sents method.""" sents = self.reader.sents() uniq_words = distinct_words(sents) # Curious—why the original test checked for two different words? if 'Library' in uniq_words: self.fail('Filtered word present!') # You can check for uniq_words because it implies that sents had content self.assertTrue(uniq_words)
def test_filtered_corpus_reader_paras(self): """Test filtered corpus paras method.""" paras = self.reader.paras() sents = [sent for para in paras for sent in para] uniq_words = distinct_words(sents) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(uniq_words)
def test_filtered_corpus_reader_docs(self): """Test filtered corpus docs method.""" reader = get_corpus_reader(language='latin', corpus_name='latin_text_latin_library') reader._fileids = ['catullus.txt'] docs = list(reader.docs()) words = distinct_words(docs) if 'Latin' in words: self.fail('Filtered word present!') if 'Library' in words: self.fail('Filtered word present!') self.assertTrue(len(docs) > 0)
def test_filtered_corpus_reader_paras(self): """Test filtered corpus paras method.""" reader = get_corpus_reader(language='latin', corpus_name='latin_text_latin_library') reader._fileids = ['catullus.txt'] paras = list(reader.paras()) sents = [sent for para in paras for sent in para] uniq_words = distinct_words(sents) if 'Latin' in uniq_words: self.fail('Filtered word present!') if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(len(paras) > 0)
def test_filtered_corpus_reader_docs(self): """Test filtered corpus docs method.""" docs = list(self.reader.docs()) uniq_words = distinct_words(docs) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(len(docs) > 0) problem_files = ['caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt', 'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt', 'varro.ll9.txt'] for filename in problem_files: doc = list(self.reader.docs([filename])) assert(doc) assert(len(doc[0]) > 100)
def test_filtered_corpus_reader_docs(self): """Test filtered corpus docs method.""" docs = list(self.reader.docs()) uniq_words = distinct_words(docs) if 'Library' in uniq_words: self.fail('Filtered word present!') self.assertTrue(len(docs) > 0) problem_files = [ 'caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt', 'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt', 'varro.ll9.txt' ] for filename in problem_files: doc = list(self.reader.docs([filename])) assert (doc) assert (len(doc[0]) > 100)
def test_filtered_corpus_reader_docs(self): """Test filtered corpus docs method.""" reader = get_corpus_reader(language='latin', corpus_name='latin_text_latin_library') reader._fileids = ['catullus.txt'] docs = list(reader.docs()) words = distinct_words(docs) if 'Latin' in words: self.fail('Filtered word present!') if 'Library' in words: self.fail('Filtered word present!') self.assertTrue(len(docs) > 0) problem_files = [ 'caesar/bc3.txt', 'hymni.txt', 'varro.frag.txt', 'varro.ll10.txt', 'varro.ll5.txt', 'varro.ll6.txt', 'varro.ll7.txt', 'varro.ll8.txt', 'varro.ll9.txt' ] for filename in problem_files: doc = list(reader.docs([filename])) assert (doc) assert (len(doc[0]) > 100)