class WikiReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) wiki_fname =os.path.join(self.tempdir, 'wikitext.xml.bz2') with bzip_open(wiki_fname, mode='w') as f: f.write(WIKITEXT) self.wikireader = WikiReader(wiki_fname) def test_texts(self): texts = list(self.wikireader.texts()) for text in texts: self.assertIsInstance(text, str) def test_texts_min_len(self): texts = list(self.wikireader.texts(min_len=300)) self.assertEqual(len(texts), 1) def test_texts_limit(self): texts = list(self.wikireader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_pages(self): pages = list(self.wikireader.pages()) for page in pages: self.assertIsInstance(page, dict) def test_pages_min_len(self): pages = list(self.wikireader.pages(min_len=300)) self.assertEqual(len(pages), 1) def test_pages_limit(self): pages = list(self.wikireader.pages(limit=1)) self.assertEqual(len(pages), 1) def tearDown(self): for fname in os.listdir(self.tempdir): os.remove(os.path.join(self.tempdir, fname)) os.rmdir(self.tempdir)
class WikiReaderTestCase(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp( prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__))) wiki_fname = os.path.join(self.tempdir, 'wikitext.xml.bz2') write_file(WIKITEXT, wiki_fname, mode='wb', auto_make_dirs=True) self.wikireader = WikiReader(wiki_fname) def test_texts(self): texts = list(self.wikireader.texts()) for text in texts: self.assertIsInstance(text, unicode_type) def test_texts_min_len(self): texts = list(self.wikireader.texts(min_len=300)) self.assertEqual(len(texts), 1) def test_texts_limit(self): texts = list(self.wikireader.texts(limit=1)) self.assertEqual(len(texts), 1) def test_pages(self): pages = list(self.wikireader.pages()) for page in pages: self.assertIsInstance(page, dict) def test_pages_min_len(self): pages = list(self.wikireader.pages(min_len=300)) self.assertEqual(len(pages), 1) def test_pages_limit(self): pages = list(self.wikireader.pages(limit=1)) self.assertEqual(len(pages), 1) def tearDown(self): shutil.rmtree(self.tempdir)