def find_verb_subject(tnode: SimpleTree) -> Optional[SimpleTree]: """ Starting with a verb terminal node, attempt to find the verb's subject noun phrase """ subj = None # First, check within the enclosing verb phrase # (the subject may be embedded within it, as in # ?'Í dag langaði Páli bróður að fara í sund') p = tnode.enclosing_tag("VP").enclosing_tag("VP") if p is not None: try: subj = p.NP_SUBJ except AttributeError: pass if subj is None: # If not found there, look within the # enclosing IP (inflected phrase) node, if any p = tnode.enclosing_tag("IP") if p is not None: # Found the inflected phrase: # find the NP-SUBJ node, if any try: subj = p.NP_SUBJ except AttributeError: pass return subj
def wrong_verb_use( self, match: SimpleTree, correct_verb: str, context: ContextType, ) -> None: """ Annotate wrong verbs being used with nouns, for instance 'byði hnekki' where the verb should be 'bíða' -> 'biði hnekki' instead of 'bjóða' """ vp = match.first_match("VP > { %verb }", context) verb = next(ch for ch in vp.children if ch.tcat == "so").own_lemma_mm np = match.first_match("NP >> { %noun }", context) start, end = min(vp.span[0], np.span[0]), max(vp.span[1], np.span[1]) # noun = next(ch for ch in np.leaves if ch.tcat == "no").own_lemma text = "Hér á líklega að vera sögnin '{0}' í stað '{1}'.".format( correct_verb, verb) detail = "Í samhenginu '{0}' er rétt að nota sögnina '{1}' í stað '{2}'.".format( match.tidy_text, correct_verb, verb) suggest = "" self._ann.append( Annotation( start=start, end=end, code="P002", text=text, detail=detail, suggest=suggest, ))
def wrong_preposition_að(self, match: SimpleTree) -> None: """ Handle a match of a suspect preposition pattern """ # Find the offending verb phrase vp = match.first_match("VP > { %verb }", self.ctx_að) if vp is None: vp = match.first_match("VP >> { %verb }", self.ctx_að) # Find the attached prepositional phrase pp = match.first_match('P > { "að" }') # Calculate the start and end token indices, spanning both phrases start, end = min(vp.span[0], pp.span[0]), max(vp.span[1], pp.span[1]) text = "'{0} að' á sennilega að vera '{0} af'".format(vp.tidy_text) detail = ("Sögnin '{0}' tekur yfirleitt með sér " "forsetninguna 'af', ekki 'að'.".format(vp.tidy_text)) if match.tidy_text.count(" að ") == 1: # Only one way to substitute að -> af: do it suggest = match.tidy_text.replace(" að ", " af ") else: # !!! TODO: More intelligent substitution to create a suggestion suggest = "" self._ann.append( Annotation( start=start, end=end, code="P001", text=text, detail=detail, suggest=suggest, ))
def _simple_tree(self, node: Node) -> SimpleTree: """ Return a SimpleTree instance spanning the deep tree of which node is the root """ first, last = self._node_span(node) toklist = self._tokens[first:last + 1] return SimpleTree.from_deep_tree(node, toklist, first_token_index=first)
def simple_parse(text): """ No-frills parse of text, returning a SimpleTree object """ if not Settings.loaded: Settings.read("config/Greynir.conf") with SessionContext(read_only=True) as session: return SimpleTree(*TreeUtility.parse_text(session, text))