Exemple #1
0
import collections

import numpy as np
import pytest
import spacy
from spacy.tokens import Doc

from textacy import Corpus
from textacy import load_spacy_lang
from textacy.datasets.capitol_words import CapitolWords

DATASET = CapitolWords()

pytestmark = pytest.mark.skipif(
    DATASET.filepath is None,
    reason="CapitolWords dataset must be downloaded before running tests",
)


@pytest.fixture(scope="module")
def corpus():
    return Corpus("en", data=DATASET.records(limit=5))


class TestCorpusInit:
    def test_corpus_init_lang(self):
        assert isinstance(Corpus("en"), Corpus)
        assert isinstance(Corpus(load_spacy_lang("en")), Corpus)
        for bad_lang in (b"en", None):
            with pytest.raises(TypeError):
                Corpus(bad_lang)
 def test_ioerror(self):
     dataset = CapitolWords(data_dir=self.tempdir)
     with self.assertRaises(IOError):
         _ = list(dataset.texts())
 def test_download(self):
     dataset = CapitolWords(data_dir=self.tempdir)
     dataset.download()
     self.assertTrue(os.path.exists(dataset.filename))
def test_ioerror(tmpdir):
    dataset = CapitolWords(data_dir=str(tmpdir))
    with pytest.raises(IOError):
        _ = list(dataset.texts())
def test_download(tmpdir):
    dataset = CapitolWords(data_dir=str(tempdir))
    dataset.download()
    assert os.path.exists(dataset.filename)
Exemple #6
0
def test_download(tmpdir):
    dataset = CapitolWords(data_dir=str(tmpdir))
    dataset.download()
    assert os.path.isfile(dataset._filepath)