Ejemplo n.º 1
0
class WikiReaderTestCase(unittest.TestCase):

    def setUp(self):
        self.tempdir = tempfile.mkdtemp(
            prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__)))
        wiki_fname =os.path.join(self.tempdir, 'wikitext.xml.bz2')
        with bzip_open(wiki_fname, mode='w') as f:
            f.write(WIKITEXT)
        self.wikireader = WikiReader(wiki_fname)

    def test_texts(self):
        texts = list(self.wikireader.texts())
        for text in texts:
            self.assertIsInstance(text, str)

    def test_texts_min_len(self):
        texts = list(self.wikireader.texts(min_len=300))
        self.assertEqual(len(texts), 1)

    def test_texts_limit(self):
        texts = list(self.wikireader.texts(limit=1))
        self.assertEqual(len(texts), 1)

    def test_pages(self):
        pages = list(self.wikireader.pages())
        for page in pages:
            self.assertIsInstance(page, dict)

    def test_pages_min_len(self):
        pages = list(self.wikireader.pages(min_len=300))
        self.assertEqual(len(pages), 1)

    def test_pages_limit(self):
        pages = list(self.wikireader.pages(limit=1))
        self.assertEqual(len(pages), 1)

    def tearDown(self):
        for fname in os.listdir(self.tempdir):
            os.remove(os.path.join(self.tempdir, fname))
        os.rmdir(self.tempdir)
Ejemplo n.º 2
0
class WikiReaderTestCase(unittest.TestCase):

    def setUp(self):
        self.tempdir = tempfile.mkdtemp(
            prefix='test_corpora', dir=os.path.dirname(os.path.abspath(__file__)))
        wiki_fname = os.path.join(self.tempdir, 'wikitext.xml.bz2')
        write_file(WIKITEXT, wiki_fname, mode='wb', auto_make_dirs=True)
        self.wikireader = WikiReader(wiki_fname)

    def test_texts(self):
        texts = list(self.wikireader.texts())
        for text in texts:
            self.assertIsInstance(text, unicode_type)

    def test_texts_min_len(self):
        texts = list(self.wikireader.texts(min_len=300))
        self.assertEqual(len(texts), 1)

    def test_texts_limit(self):
        texts = list(self.wikireader.texts(limit=1))
        self.assertEqual(len(texts), 1)

    def test_pages(self):
        pages = list(self.wikireader.pages())
        for page in pages:
            self.assertIsInstance(page, dict)

    def test_pages_min_len(self):
        pages = list(self.wikireader.pages(min_len=300))
        self.assertEqual(len(pages), 1)

    def test_pages_limit(self):
        pages = list(self.wikireader.pages(limit=1))
        self.assertEqual(len(pages), 1)

    def tearDown(self):
        shutil.rmtree(self.tempdir)