def oblique_relative_clause(dep_graph, oia_graph, context: UD2OIAContext): """ ##### Oblique relative Clause ##### ##### An announcement, in which he stated that ##### :param sentence: :return: """ pattern = DependencyGraph() a = DependencyGraphNode() b = DependencyGraphNode() c = DependencyGraphNode(FEATS={"PronType": "Rel"}) d = DependencyGraphNode() pattern.add_nodes([a, b, c, d]) pattern.add_dependency(a, d, r'acl:relcl\w*') pattern.add_dependency(a, c, r'ref') pattern.add_dependency(d, c, r'obl') pattern.add_dependency(c, b, r'case') for match in dep_graph.match(pattern): dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]] a_node, b_node, c_node, d_node = [ oia_graph.add_words(x.position) for x in [dep_a, dep_b, dep_c, dep_d] ] oia_graph.add_argument(b_node, d_node, 1) oia_graph.add_argument(b_node, c_node, 2) oia_graph.add_ref(c_node, a_node)
def goeswith(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ goeswith_phrases = [] for n in dep_graph.nodes(): goeswith_nodes = [n for n, l in dep_graph.children(n, filter=lambda n, l: "goeswith" in l)] if not goeswith_nodes: continue goeswith_nodes.append(n) goeswith_nodes.sort(key=lambda n: n.LOC) goeswith_phrases.append(goeswith_nodes) for goeswith_nodes in goeswith_phrases: upos = "X" for node in goeswith_nodes: if node.UPOS != "X": upos = node.UPOS new_node = merge_dep_nodes(goeswith_nodes, UPOS=upos, LOC=goeswith_nodes[-1].LOC ) dep_graph.replace_nodes(goeswith_nodes, new_node)
def aclwhose(dep_graph, oia_graph, context: UD2OIAContext): """ #### the person whose/who's cat is cute #### @return a list of four-tuple (noun, whose/who's, possessee, aclmodifier) :param sentence: :return: """ pattern = DependencyGraph() a = DependencyGraphNode() # person b = DependencyGraphNode(FEATS={"PronType": "Int"}) # whose c = DependencyGraphNode() # cat d = DependencyGraphNode() # cute pattern.add_nodes([a, b, c, d]) pattern.add_dependency(a, d, r'.*acl:relcl.*') pattern.add_dependency(d, c, r'.*nsubj|obj|iobj.*') pattern.add_dependency(c, b, r'.*nmod:poss.*') # pattern.add_dependency(b, a, r'.*ref.*') for match in dep_graph.match(pattern): dep_a, dep_b, dep_c, dep_d = [match[x] for x in [a, b, c, d]] a_node, b_node, c_node, d_node = [ oia_graph.add_words(x.position) for x in [dep_a, dep_b, dep_c, dep_d] ] oia_graph.add_function(d_node, a_node) oia_graph.add_function(b_node, c_node) oia_graph.add_ref(b_node, a_node)
def number_per_unit(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ units = [] for node in dep_graph.nodes(filter=lambda n: n.UPOS == "SYM"): previous_node = dep_graph.get_node_by_loc(node.LOC - 1) post_node = dep_graph.get_node_by_loc(node.LOC + 1) if not previous_node or not post_node: continue if previous_node.UPOS == "NUM" and post_node.UPOS == "NOUN": units.append((previous_node, node, post_node)) for unit in units: unit_node = merge_dep_nodes(unit, UPOS="NUM", LOC=unit[-1].LOC ) dep_graph.replace_nodes(unit, unit_node)
def and_or_conjunction(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #### Coordination #### #### I like apples, bananas and oranges. conj:and/or with punct #### @return a list of list of conjuncted entities :param sentence: :return: """ for node in dep_graph.nodes(): conj_components = list( dep_graph.children(node, filter=lambda n, l: l.startswith("arg_con"))) if not conj_components: continue oia_conj_root_node = oia_graph.add_words(node.position) for child, rels in conj_components: soake_child_node = oia_graph.add_words(child.position) arg_index = int(rels.values()[0]) oia_graph.add_argument(oia_conj_root_node, soake_child_node, arg_index)
def simplify(self, dep_graph: DependencyGraph, context: UD2OIAContext, options: WorkFlowOptions): """ @param dep_graph: @type dep_graph: @return: @rtype: """ def __simplify_hook(): self.simplified = True return self.working_stack() dep_graph.set_context_hook(__simplify_hook) if options.debug: context.debugger.record("simplify", "init", dep_graph) for index, simplifier in enumerate(self.simplifiers): logger.debug("simplify by: {0}".format(simplifier.__name__)) try: self.simplified = False simplifier(dep_graph) if options.debug and self.simplified: context.debugger.record("simplify", simplifier.__name__, dep_graph) except Exception as e: logger.opt(exception=True).error( "Error when running Simplifier: ", simplifier.__name__) raise e
def conjunction(dep_graph: DependencyGraph): """ #### Coordination #### #### I like apples, bananas and oranges. conj:and/or with punct #### @return a list of list of conjuncted entities TODO: currently cannot process nested conjunction. should process from bottom to up :param sentence: :return: """ # find the root of conj and do the process root_of_conj = [] for node in dep_graph.nodes(): if any( rels.startswith("conj") for parent, rels in dep_graph.parents(node)): continue if any( rels.startswith("conj") for child, rels in dep_graph.children(node)): root_of_conj.append(node) for root in root_of_conj: logger.debug("found the root of conjunction") logger.debug(str(root)) process_conjunction(dep_graph, root) process_head_conj(dep_graph)
def parallel_list(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ list_phrases = [] for n in dep_graph.nodes(): list_nodes = [ n for n, l in dep_graph.children(n, filter=lambda n, l: "list" in l) ] if not list_nodes: continue list_nodes.append(n) list_nodes.sort(key=lambda n: n.LOC) list_phrases.append(list_nodes) for list_nodes in list_phrases: pred = oia_graph.add_aux("LIST") for idx, node in enumerate(list_nodes): oia_arg = oia_graph.add_words(node.position) oia_graph.add_argument(pred, oia_arg, idx + 1)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def part(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ pattern = DependencyGraph() parent_node = pattern.create_node(UPOS="AUX|VERB") part_node = pattern.create_node(UPOS="PART") pattern.add_dependency(parent_node, part_node, r'advmod') for match in list(dep_graph.match(pattern)): dep_parent_node = match[parent_node] dep_part_node = match[part_node] new_node_list = [dep_parent_node, dep_part_node] new_node_list.sort(key=lambda n: n.LOC) new_node = merge_dep_nodes(new_node_list, UPOS=dep_parent_node.UPOS, LOC=dep_parent_node.LOC, FEATS=dep_parent_node.FEATS ) dep_graph.replace_nodes(new_node_list, new_node)
def complete_missing_case_mark(dep_graph: DependencyGraph, root, root_parents, parallel_components, relation_to_conj, case_marks): """ :param dep_graph: :param parallel_components: :return: """ parallel_components.sort(key=lambda x: x.LOC) for parent in root_parents: # ic(str(root)) # ic(str(parent)) # ic(relation_to_conj) prefix, shared_prefix, required_mark = relation_to_conj[parent.ID] if not required_mark: continue for index, (node, mark) in enumerate(zip(parallel_components, required_mark)): if mark is None: continue is_exist = any( mark == child.LEMMA or mark in child.LEMMA.split(" ") for child, l in dep_graph.children(node)) if is_exist: continue found_mark = find_mark(case_marks, parallel_components[:index], mark) if found_mark: mark_node, rel = found_mark dup_case_mark = dep_graph.create_node(FORM=mark_node.FORM, LEMMA=mark_node.LEMMA, UPOS=mark_node.UPOS, LOC=mark_node.LOC) dup_case_mark.aux = True dep_graph.add_dependency(node, dup_case_mark, rel) else: logger.warning("cannot find the mark, just add the relation")
def it_verb_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx to do ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") verb_node = pattern.create_node(UPOS="VERB") subj_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|VERB") pattern.add_dependency(verb_node, it_node, r'expl') pattern.add_dependency(verb_node, subj_node, r'nsubj|csubj') for match in dep_graph.match(pattern): dep_verb_node, dep_it_node, dep_subj_node = \ [match[x] for x in [verb_node, it_node, subj_node]] if context.is_processed(dep_verb_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) if dep_it_node.LOC < dep_subj_node.LOC: # it VERB subj that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 1) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_ref(oia_it_node, oia_subj_node) else: # subj VERB it that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 2) oia_graph.add_argument(oia_verb_node, oia_subj_node, 2) oia_graph.add_ref(oia_it_node, oia_subj_node) # dep_graph.remove_dependency(dep_verb_node, dep_subj_node) context.processed(dep_verb_node, dep_it_node) context.processed(dep_verb_node, dep_subj_node)
def acl_loop(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ for n1, n2, deps in dep_graph.dependencies(): if "acl:relcl" in deps: back_deps = dep_graph.get_dependency(n2, n1) if any(x in back_deps for x in {"obl", "nsubj", "obj", "mark", "advmod"}): dep_graph.remove_dependency(n2, n1)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def ud2oia(lang, uri, ud_data, enhanced=True, debug=False): """ @param ud: @type ud: @return: @rtype: """ standard.language = lang oia_converter = UD2OIAConverter() options = WorkFlowOptions() options.uri = uri options.debug = debug dep_graph = DependencyGraph.from_conll(ud_data, need_root=True, enhanced=enhanced) dep_graph.enhanced = enhanced try: oia_graph, context = oia_converter.convert(dep_graph, options) except Exception as e: logger.error("Exception in processing ud:") for line in ud_data: logger.error("\t".join(line)) raise e return oia_graph, context
def find_new_nodes(old_node, dep_graph: DependencyGraph): """TODO: add doc string """ for node in dep_graph.nodes(): if old_node.ID in node.ID: return node return None
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def noun_all(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ noun_all_phrase = [] for root in dep_graph.nodes(filter=lambda x: x.UPOS in {"NOUN", "PROPN", "PRON", "X", "NUM", "SYM"}): for child, rels in dep_graph.children(root): if "det" in rels and child.LEMMA == "all" and child.LOC == root.LOC + 1: noun_all_phrase.append((root, child)) for noun, all in noun_all_phrase: noun_node = merge_dep_nodes([noun, all], UPOS=noun.UPOS, LOC=noun.LOC) # print("Noun detected", noun_node.ID) dep_graph.replace_nodes([noun, all], noun_node)
def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun modified_node = DependencyGraphNode() modifier_node = DependencyGraphNode() pattern.add_nodes([modified_node, modifier_node]) pattern.add_dependency(modified_node, modifier_node, r'\bobl') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] if oia_graph.has_relation(dep_modified_node, dep_modifier_node, direct_link=False): continue oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_modified_node)
def adjv_phrase(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ phrases = [] for node in dep_graph.nodes(filter=lambda n: n.UPOS in {"ADJ", "ADV"}): is_root = True for parent, rel in dep_graph.parents(node): if "advmod" in rel and parent.UPOS not in {"ADJ", "ADV"}: is_root = True break elif rel.intersect(valid_adj_form): is_root = False if not is_root: continue adjv_element = valid_adjv_element(node, dep_graph) adjv_element = sorted(list(adjv_element), key=lambda x: x.LOC) connected_components = [node] start_loc = node.LOC for child in reversed(adjv_element): # print(str(node.FORM)) if child.UPOS in {"ADJ", "ADV"} and child.LOC == start_loc - 1: connected_components.append(child) start_loc = child.LOC connected_components.sort(key=lambda x: x.LOC) if len(connected_components) > 1: phrases.append((connected_components, node)) for adjv_phrase, node in phrases: adjv_node = merge_dep_nodes(adjv_phrase, UPOS=node.UPOS, LOC=node.LOC) # print("Noun detected", noun_node.ID) dep_graph.replace_nodes(adjv_phrase, adjv_node)
def adj_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ adj previous to noun is coped with by noun phrase this process the case that adj is behind the noun #################### a pretty little boy ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node() # UPOS="NOUN|PRON|PROPN") adj_node = pattern.create_node() # UPOS="ADJ|NOUN") pattern.add_dependency(noun_node, adj_node, r'amod') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] oia_noun_node = oia_graph.add_words(dep_noun_node.position) oia_adj_node = oia_graph.add_words(dep_adj_node.position) logger.debug("adj_modifier: ") logger.debug(dep_noun_node.position) logger.debug(oia_noun_node) logger.debug(dep_adj_node.position) logger.debug(oia_adj_node) oia_graph.add_mod(oia_adj_node, oia_noun_node)
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ Adverbial Clause ##### run in order to catch it. advcl with mark (in order to) ##### ##### he worked hard, replacing his feud. advcl without mark ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(verb_node, modifier_node, "advcl") for match in list(dep_graph.match(pattern)): dep_verb_node = match[verb_node] dep_modifier_node = match[modifier_node] if context.is_processed(dep_verb_node, dep_modifier_node): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) logger.debug("adverbial clause: verb={0}, modifier={1}".format( dep_verb_node.position, dep_modifier_node.position)) if oia_graph.has_relation(oia_verb_node, oia_modifier_node): continue mark = list( dep_graph.children(dep_modifier_node, filter=lambda n, rel: "mark" in rel)) if mark: mark, rel = mark[0] pred_node = oia_graph.add_words(mark.position) if pred_node is None: continue if mark.LEMMA in CONJUNCTION_WORDS[language]: continue oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(pred_node, oia_modifier_node, 2) else: oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def to_verb(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ to_verb_phrase = [] for root in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB"}): if any("to" in rels.values() for parent, rels in dep_graph.parents(root)): continue for child, rels in dep_graph.children(root): if "mark" in rels and child.LEMMA == "to" and child.LOC == root.LOC - 1 and \ not (isinstance(child, DependencyGraphSuperNode) and child.is_conj): to_verb_phrase.append((child, root)) for to, verb in to_verb_phrase: noun_node = merge_dep_nodes([to, verb], UPOS=verb.UPOS, LOC=verb.LOC) # print("Noun detected", noun_node.ID) dep_graph.replace_nodes([to, verb], noun_node)
def adv_adj_modifier(dep_graph, oia_graph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun adj_node = DependencyGraphNode(UPOS="ADJ") adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN") pattern.add_nodes([adj_node, adv_node]) pattern.add_dependency(adj_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_adj_node = match[adj_node] dep_adv_node = match[adv_node] if oia_graph.has_relation(dep_adj_node, dep_adv_node): continue oia_adj_node = oia_graph.add_words(dep_adj_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) oia_graph.add_mod(oia_adv_node, oia_adj_node)
def appositive_phrase(dep_graph, oia_graph, context: UD2OIAContext): """ ##### Apposition: Trump, president of US, came ##### :param sentence: :return: """ pattern = DependencyGraph() subj_node = DependencyGraphNode() appos_node = DependencyGraphNode() pattern.add_nodes([subj_node, appos_node]) pattern.add_dependency(subj_node, appos_node, r'\w*appos\w*') for match in dep_graph.match(pattern): dep_subj_node = match[subj_node] dep_appos_node = match[appos_node] oia_appos_node = oia_graph.add_words(dep_appos_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_appos_node and oia_subj_node: pred_node = oia_graph.add_aux(label="APPOS") oia_graph.add_argument(pred_node, oia_subj_node, 1) oia_graph.add_argument(pred_node, oia_appos_node, 2)
def get_adj_verb_phrase(dep_graph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() adj_node = pattern.create_node(UPOS="ADJ") get_node = pattern.create_node(LEMMA="get", UPOS="VERB") pattern.add_dependency(adj_node, get_node, r'aux') verb_phrases = [] for match in dep_graph.match(pattern): dep_adj_node = match[adj_node] dep_get_node = match[get_node] if isinstance(dep_adj_node, DependencyGraphSuperNode) and dep_adj_node.is_conj: continue pred = [dep_get_node, dep_adj_node] head = dep_adj_node verb_phrases.append((pred, head)) for verbs, root in verb_phrases: verb_node = merge_dep_nodes(verbs, UPOS="VERB", LOC=root.LOC) dep_graph.replace_nodes(verbs, verb_node)
def there_be_verb_phrase(dep_graph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() there_node = pattern.create_node(FORM=r'there|There') be_node = pattern.create_node() pattern.add_dependency(be_node, there_node, r'\w*expl\w*') verb_phrases = [] for match in dep_graph.match(pattern): dep_there_node = match[there_node] dep_be_node = match[be_node] if not "be" in dep_be_node.LEMMA.split(" "): continue pred = [dep_there_node, dep_be_node] head = dep_be_node verb_phrases.append((pred, head)) for verbs, root in verb_phrases: verb_node = merge_dep_nodes(verbs, UPOS="VERB", LOC=root.LOC) dep_graph.replace_nodes(verbs, verb_node)
def fallback_sconj(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ for node in dep_graph.nodes(): if oia_graph.has_word(node.position): continue if node.UPOS == "SCONJ" and node.LEMMA in { "because", "so", "if", "then", "otherwise", "after", "before", "and", "or", "but" }: parents = [n for n, l in dep_graph.parents(node) if "mark" in l] if not parents: continue assert len(parents) == 1 parent = parents[0] logger.debug("context = " + str(context.processed_edges)) if context.is_processed(parent, node): continue oiar_node = oia_graph.add_words(parent.position) oia_sconj_node = oia_graph.add_words(node.position) if node.LEMMA in {"because", "if"}: oia_graph.add_argument(oia_sconj_node, oiar_node, 1) else: oia_graph.add_argument(oia_sconj_node, oiar_node, 1)
def negation(dep_graph, oia_graph, context: UD2OIAContext): """ #################### Negation ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() not_node = DependencyGraphNode(LEMMA="not") parent_node = DependencyGraphNode() pattern.add_nodes([not_node, parent_node]) pattern.add_dependency(parent_node, not_node, r'\w*') for match in dep_graph.match(pattern): dep_not_node = match[not_node] dep_parent_node = match[parent_node] oia_pred_node = oia_graph.add_aux(label="SCOPE") oia_not_node = oia_graph.add_words(dep_not_node.position) oia_parent_node = oia_graph.add_words(dep_parent_node.position) oia_graph.add_argument(oia_pred_node, oia_not_node, 1) oia_graph.add_argument(oia_pred_node, oia_parent_node, 1)
def ever_since(dep_graph: DependencyGraph): """TODO: add doc string """ ever_nodes = [] since_nodes = [] for node in dep_graph.nodes(): if node.LEMMA == "ever": ever_nodes.append(node) elif node.LEMMA == "since": since_nodes.append(node) if not ever_nodes or not since_nodes: return since_LOCs = [node.LOC for node in since_nodes] rel_remove = [] union_nodes = [] for ever_node in ever_nodes: expect_LOC = ever_node.LOC + 1 if expect_LOC not in since_LOCs: continue union_nodes.append( (ever_node, since_nodes[since_LOCs.index(expect_LOC)])) for p_node, p_rel in dep_graph.parents(ever_node): if 'advmod' not in p_rel: continue rel_remove.append((p_node, ever_node, 'advmod')) for src, trg, rel in rel_remove: dep_graph.remove_dependency(src, trg, rel) for ever_node, since_node in union_nodes: new_since_node = merge_dep_nodes([ever_node, since_node], UPOS=since_node.UPOS, LOC=since_node.LOC) dep_graph.replace_nodes([ever_node, since_node], new_since_node)