def get_random_corpus(): d = choice(pc.get_categories()) dict = pc.get_file(d, choice(pc.get_files(d))) global title for k,v in dict.items(): if k == 'description': if 'List' in v or 'list' in v: v = v.replace('list', 'Marriage') v = v.replace('List', 'Marriage') title = '##'+v else: title = '##The Marriage of '+v else: corpus = list(v) return title, corpus
def test_get_files(self): import pycorpora files = pycorpora.get_files('pycorpora_test') self.assertEqual(set(files), set(['test-filename', 'test'])) subfiles = pycorpora.get_files('pycorpora_test/subdir') self.assertEqual(subfiles, ['another_test'])