コード例 #1
0
 def test_patdict_bool_int(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, [{"IS_DIGIT": True}]))[:5]
     assert matches
     assert all(span[0].is_digit is True for span in matches)
     matches = list(extract.matches(spacy_doc, [{"LENGTH": 5}]))[:5]
     assert matches
     assert all(len(span[0]) == 5 for span in matches)
コード例 #2
0
 def test_patstr_bool_int(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, "IS_DIGIT:bool(True)"))[:5]
     assert matches
     assert all(span[0].is_digit is True for span in matches)
     matches = list(extract.matches(spacy_doc, "LENGTH:int(5)"))[:5]
     assert matches
     assert all(len(span[0]) == 5 for span in matches)
コード例 #3
0
 def test_patdict_op(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, [{
         "POS": "NOUN",
         "OP": "+"
     }]))[:5]
     assert matches
     assert all(len(span) >= 1 for span in matches)
     assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
コード例 #4
0
 def test_pattern_types(self, spacy_doc):
     all_patterns = [
         "POS:NOUN",
         ["POS:NOUN", "POS:DET"],
         [{"POS": "NOUN"}],
         [[{"POS": "NOUN"}], [{"POS": "DET"}]],
     ]
     for patterns in all_patterns:
         matches = list(extract.matches(spacy_doc, patterns))[:5]
         assert matches
         assert all(isinstance(span, Span) for span in matches)
コード例 #5
0
 def test_patdict(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, [{"POS": "NOUN"}]))[:5]
     assert matches
     assert all(len(span) == 1 for span in matches)
     assert all(span[0].pos_ == "NOUN" for span in matches)
コード例 #6
0
 def test_patstr_op(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, "POS:NOUN:+"))[:5]
     assert matches
     assert all(len(span) >= 1 for span in matches)
     assert all(tok.pos_ == "NOUN" for span in matches for tok in span)
コード例 #7
0
 def test_patstr(self, spacy_doc):
     matches = list(extract.matches(spacy_doc, "POS:NOUN"))[:5]
     assert matches
     assert all(len(span) == 1 for span in matches)
     assert all(span[0].pos_ == "NOUN" for span in matches)
コード例 #8
0
import test_spacy
from textacy.extract import matches
from scripting.knowledge_graph.HDSKG.helperfunctions import set_pos_exceptions, view_displacy
from scripting.knowledge_graph.HDSKG import patterns

nlp = test_spacy.load('nl_core_news_sm')

text = """Iedereen die in Nederland woont of werkt en die is verzekerd voor zorg vanuit de Wlz betaalt hiervoor premie. 
Hieruit wordt de zorg betaald. Bijna altijd betaalt u daarnaast een eigen bijdrage."""

doc = nlp(text)

doc = set_pos_exceptions(doc)

verb_closed_results = matches(doc, patterns.verbs_closed)
verbs_closed = [span for span in verb_closed_results]

verbs_open_results = matches(doc, patterns.verbs_open)
verbs_open = [span for span in verbs_open_results]

verbs = [verbs_open, verbs_closed]

noun_results = matches(doc, patterns.nouns)
nouns = [span for span in noun_results]

# dependency tree
words = []
for word in doc:
    if word.dep_ in 'nsubj':
        words.append(word)
        print(word.string)