def test_patdict_bool_int(self, spacy_doc): matches = list(extract.matches(spacy_doc, [{"IS_DIGIT": True}]))[:5] assert matches assert all(span[0].is_digit is True for span in matches) matches = list(extract.matches(spacy_doc, [{"LENGTH": 5}]))[:5] assert matches assert all(len(span[0]) == 5 for span in matches)
def test_patstr_bool_int(self, spacy_doc): matches = list(extract.matches(spacy_doc, "IS_DIGIT:bool(True)"))[:5] assert matches assert all(span[0].is_digit is True for span in matches) matches = list(extract.matches(spacy_doc, "LENGTH:int(5)"))[:5] assert matches assert all(len(span[0]) == 5 for span in matches)
def test_patdict_op(self, spacy_doc): matches = list(extract.matches(spacy_doc, [{ "POS": "NOUN", "OP": "+" }]))[:5] assert matches assert all(len(span) >= 1 for span in matches) assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
def test_pattern_types(self, spacy_doc): all_patterns = [ "POS:NOUN", ["POS:NOUN", "POS:DET"], [{"POS": "NOUN"}], [[{"POS": "NOUN"}], [{"POS": "DET"}]], ] for patterns in all_patterns: matches = list(extract.matches(spacy_doc, patterns))[:5] assert matches assert all(isinstance(span, Span) for span in matches)
def test_patdict(self, spacy_doc): matches = list(extract.matches(spacy_doc, [{"POS": "NOUN"}]))[:5] assert matches assert all(len(span) == 1 for span in matches) assert all(span[0].pos_ == "NOUN" for span in matches)
def test_patstr_op(self, spacy_doc): matches = list(extract.matches(spacy_doc, "POS:NOUN:+"))[:5] assert matches assert all(len(span) >= 1 for span in matches) assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
def test_patstr(self, spacy_doc): matches = list(extract.matches(spacy_doc, "POS:NOUN"))[:5] assert matches assert all(len(span) == 1 for span in matches) assert all(span[0].pos_ == "NOUN" for span in matches)
import test_spacy from textacy.extract import matches from scripting.knowledge_graph.HDSKG.helperfunctions import set_pos_exceptions, view_displacy from scripting.knowledge_graph.HDSKG import patterns nlp = test_spacy.load('nl_core_news_sm') text = """Iedereen die in Nederland woont of werkt en die is verzekerd voor zorg vanuit de Wlz betaalt hiervoor premie. Hieruit wordt de zorg betaald. Bijna altijd betaalt u daarnaast een eigen bijdrage.""" doc = nlp(text) doc = set_pos_exceptions(doc) verb_closed_results = matches(doc, patterns.verbs_closed) verbs_closed = [span for span in verb_closed_results] verbs_open_results = matches(doc, patterns.verbs_open) verbs_open = [span for span in verbs_open_results] verbs = [verbs_open, verbs_closed] noun_results = matches(doc, patterns.nouns) nouns = [span for span in noun_results] # dependency tree words = [] for word in doc: if word.dep_ in 'nsubj': words.append(word) print(word.string)