def test_patdict_op(self, doc): matches = list(extract.token_matches(doc, [{ "POS": "NOUN", "OP": "+" }]))[:5] assert matches assert all(len(span) >= 1 for span in matches) assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
def test_patdict_int(self, doc): matches = list(extract.token_matches(doc, [{"LENGTH": 5}]))[:5] assert matches assert all(len(span[0]) == 5 for span in matches)
def test_patdict_bool(self, doc): matches = list(extract.token_matches(doc, [{"IS_PUNCT": True}]))[:5] assert matches assert all(span[0].is_punct is True for span in matches)
def test_patdict(self, doc): matches = list(extract.token_matches(doc, [{"POS": "NOUN"}]))[:5] assert matches assert all(len(span) == 1 for span in matches) assert all(span[0].pos_ == "NOUN" for span in matches)
def test_patstr_int(self, doc): matches = list(extract.token_matches(doc, "LENGTH:int(5)"))[:5] assert matches assert all(len(span[0]) == 5 for span in matches)
def test_patstr_bool(self, doc): matches = list(extract.token_matches(doc, "IS_PUNCT:bool(True)"))[:5] assert matches assert all(span[0].is_punct is True for span in matches)
def test_patstr_op(self, doc): matches = list(extract.token_matches(doc, "POS:NOUN:+"))[:5] assert matches assert all(len(span) >= 1 for span in matches) assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
def test_patstr(self, doc): matches = list(extract.token_matches(doc, "POS:NOUN"))[:5] assert matches assert all(len(span) == 1 for span in matches) assert all(span[0].pos_ == "NOUN" for span in matches)
def test_pattern_types(self, doc, patterns): matches = list(extract.token_matches(doc, patterns))[:5] assert matches assert all(isinstance(span, Span) for span in matches)