コード例 #1
0
 def test_callable_args(self, spacy_doc):
     results = list(
         extract.terms(
             spacy_doc,
             ngs=lambda doc: extract.ngrams(doc, n=2),
             ents=extract.entities,
             ncs=extract.noun_chunks,
         ))
     assert results
     assert all(isinstance(result, Span) for result in results)
コード例 #2
0
 def test_dedupe(self, dedupe, spacy_doc):
     results = list(
         extract.terms(spacy_doc, ngs=2, ents=True, ncs=True,
                       dedupe=dedupe))
     assert results
     if dedupe is True:
         assert (len(results) == len(
             set((result.start, result.end) for result in results)))
     else:
         assert (len(results) > len(
             set((result.start, result.end) for result in results)))
コード例 #3
0
ファイル: test_sparse_vec.py プロジェクト: dbragdon1/textacy
def tokenized_docs():
    texts = [
        "Mary had a little lamb. Its fleece was white as snow.",
        "Everywhere that Mary went the lamb was sure to go.",
        "It followed her to school one day, which was against the rule.",
        "It made the children laugh and play to see a lamb at school.",
        "And so the teacher turned it out, but still it lingered near.",
        "It waited patiently about until Mary did appear.",
        "Why does the lamb love Mary so? The eager children cry.",
        "Mary loves the lamb, you know, the teacher did reply.",
    ]
    nlp = textacy.load_spacy_lang("en_core_web_sm")
    docs = list(nlp.pipe(texts))
    tokenized_docs = [
        [term.text.lower() for term in extract.terms(doc, ngs=1)] for doc in docs
    ]
    return tokenized_docs
コード例 #4
0
 def test_simple_args(self, spacy_doc):
     results = list(extract.terms(spacy_doc, ngs=2, ents=True, ncs=True))
     assert results
     assert all(isinstance(result, Span) for result in results)
コード例 #5
0
 def test_default(self, spacy_doc):
     with pytest.raises(ValueError):
         _ = list(extract.terms(spacy_doc))