Ejemplo n.º 1
0
def spacy_doc():
    ds = datasets.CapitolWords()
    text = next(ds.texts(min_len=1500, limit=1))
    return textacy.make_spacy_doc(text, lang="en")
Ejemplo n.º 2
0
import pytest

import textacy
from textacy import datasets
from textacy.extract import keyterms as kt

DATASET = datasets.CapitolWords()

pytestmark = pytest.mark.skipif(
    DATASET.filepath is None,
    reason="CapitolWords dataset must be downloaded before running tests",
)


@pytest.fixture(scope="module")
def spacy_doc():
    text = next(DATASET.texts(min_len=1500, limit=1))
    return textacy.make_spacy_doc(text, lang="en_core_web_sm")


@pytest.fixture(scope="module")
def empty_spacy_doc():
    return textacy.make_spacy_doc("", lang="en_core_web_sm")


def test_default(spacy_doc):
    result = kt.sgrank(spacy_doc)
    assert isinstance(result, list) and len(result) > 0
    assert all(isinstance(ts, tuple) and len(ts) == 2 for ts in result)
    assert all(
        isinstance(ts[0], str) and isinstance(ts[1], float) for ts in result)