def spacy_doc(): ds = datasets.CapitolWords() text = next(ds.texts(min_len=1500, limit=1)) return textacy.make_spacy_doc(text, lang="en")
import pytest import textacy from textacy import datasets from textacy.extract import keyterms as kt DATASET = datasets.CapitolWords() pytestmark = pytest.mark.skipif( DATASET.filepath is None, reason="CapitolWords dataset must be downloaded before running tests", ) @pytest.fixture(scope="module") def spacy_doc(): text = next(DATASET.texts(min_len=1500, limit=1)) return textacy.make_spacy_doc(text, lang="en_core_web_sm") @pytest.fixture(scope="module") def empty_spacy_doc(): return textacy.make_spacy_doc("", lang="en_core_web_sm") def test_default(spacy_doc): result = kt.sgrank(spacy_doc) assert isinstance(result, list) and len(result) > 0 assert all(isinstance(ts, tuple) and len(ts) == 2 for ts in result) assert all( isinstance(ts[0], str) and isinstance(ts[1], float) for ts in result)