예제 #1
0
 def test_download(self, tmpdir):
     dataset = wikimedia.Wikipedia(data_dir=str(tmpdir))
     dataset.download()
     assert os.path.isfile(dataset.filepath)
예제 #2
0
 def test_oserror(self, tmpdir):
     dataset = wikimedia.Wikipedia(data_dir=str(tmpdir))
     with pytest.raises(OSError):
         _ = list(dataset.texts())
예제 #3
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

import os
import re

import pytest

from textacy import compat
from textacy.datasets import wikimedia


WIKINEWS = wikimedia.Wikinews(lang="en", version="current")
WIKIPEDIA = wikimedia.Wikipedia(lang="en", version="current")


@pytest.mark.skipif(
    WIKIPEDIA.filepath is None,
    reason="Wikinews dataset must be downloaded before running tests",
)
class TestWikipedia(object):

    @pytest.mark.skip("No need to download a new dataset every time")
    def test_download(self, tmpdir):
        dataset = wikimedia.Wikipedia(data_dir=str(tmpdir))
        dataset.download()
        assert os.path.isfile(dataset.filepath)

    def test_oserror(self, tmpdir):
        dataset = wikimedia.Wikipedia(data_dir=str(tmpdir))
        with pytest.raises(OSError):