def obl_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun modified_node = DependencyGraphNode() modifier_node = DependencyGraphNode() pattern.add_nodes([modified_node, modifier_node]) pattern.add_dependency(modified_node, modifier_node, r'\bobl') for match in dep_graph.match(pattern): dep_modified_node = match[modified_node] dep_modifier_node = match[modifier_node] if oia_graph.has_relation(dep_modified_node, dep_modifier_node, direct_link=False): continue oia_modified_node = oia_graph.add_words(dep_modified_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_modified_node)
def ccomp_mark_sconj(dep_graph: DependencyGraph): """ See them as they are :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() pred1_node = pattern.create_node(UPOS="VERB") pred2_node = pattern.create_node() sconj_node = pattern.create_node(UPOS="SCONJ") pattern.add_dependency(pred1_node, pred2_node, r'ccomp') pattern.add_dependency(pred2_node, sconj_node, 'mark') for match in list(dep_graph.match(pattern)): dep_pred1_node = match[pred1_node] dep_pred2_node = match[pred2_node] dep_sconj_node = match[sconj_node] if dep_sconj_node.LEMMA == "as": dep_graph.remove_dependency(dep_pred2_node, dep_sconj_node) new_verb = [dep_pred1_node, "{1}", dep_sconj_node, "{2}"] new_verb_node = merge_dep_nodes(new_verb, UPOS=dep_pred1_node.UPOS, LOC=dep_pred1_node.LOC) # print("Noun detected", noun_node.ID) dep_graph.replace_nodes(new_verb, new_verb_node)
def reverse_passive_verb(dep_graph: DependencyGraph): """ I'd forgotten how blown away I was by some of the songs the first time I saw it in NY. :param dep_graph: :return: """ pattern = DependencyGraph() subj_node = pattern.create_node() verb_node = pattern.create_node(UPOS="VERB", FEATS={"Tense": "Past"}) be_node = pattern.create_node(LEMMA=r"\bbe\b") pattern.add_dependency(verb_node, subj_node, r"\w*subj") pattern.add_dependency(verb_node, be_node, "cop") for match in list(dep_graph.match(pattern)): dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] dep_be_node = match[be_node] if not (dep_verb_node.LOC < dep_subj_node.LOC < dep_be_node.LOC): continue new_verb_phrase = [dep_be_node, dep_verb_node] dep_new_verb = merge_dep_nodes(new_verb_phrase, UPOS="VERB", LOC=dep_be_node.LOC) dep_graph.replace_nodes(new_verb_phrase, dep_new_verb)
def whose_noun(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM") owner_node = pattern.create_node() whose_node = pattern.create_node(LEMMA="whose") pattern.add_dependency(noun_node, owner_node, "nmod:poss") pattern.add_dependency(owner_node, whose_node, "ref") whose_noun_phrase = [] for match in dep_graph.match(pattern): dep_owner_node = match[owner_node] dep_noun_node = match[noun_node] dep_whose_node = match[whose_node] whose_noun_phrase.append( (dep_owner_node, dep_whose_node, dep_noun_node)) for owner, whose, noun in whose_noun_phrase: noun_node = merge_dep_nodes([whose, noun], UPOS=noun.UPOS, LOC=noun.LOC) # print("Noun detected", noun_node.ID) dep_graph.remove_dependency(owner_node, whose) dep_graph.remove_dependency(noun, owner_node, "nmod:poss") dep_graph.replace_nodes([whose, noun], noun_node)
def adj_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ adj previous to noun is coped with by noun phrase this process the case that adj is behind the noun #################### a pretty little boy ######################## :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node() # UPOS="NOUN|PRON|PROPN") adj_node = pattern.create_node() # UPOS="ADJ|NOUN") pattern.add_dependency(noun_node, adj_node, r'amod') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] oia_noun_node = oia_graph.add_words(dep_noun_node.position) oia_adj_node = oia_graph.add_words(dep_adj_node.position) logger.debug("adj_modifier: ") logger.debug(dep_noun_node.position) logger.debug(oia_noun_node) logger.debug(dep_adj_node.position) logger.debug(oia_adj_node) oia_graph.add_mod(oia_adj_node, oia_noun_node)
def part(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ pattern = DependencyGraph() parent_node = pattern.create_node(UPOS="AUX|VERB") part_node = pattern.create_node(UPOS="PART") pattern.add_dependency(parent_node, part_node, r'advmod') for match in list(dep_graph.match(pattern)): dep_parent_node = match[parent_node] dep_part_node = match[part_node] new_node_list = [dep_parent_node, dep_part_node] new_node_list.sort(key=lambda n: n.LOC) new_node = merge_dep_nodes(new_node_list, UPOS=dep_parent_node.UPOS, LOC=dep_parent_node.LOC, FEATS=dep_parent_node.FEATS ) dep_graph.replace_nodes(new_node_list, new_node)
def separated_asas(dep_graph: DependencyGraph): """ ##### Equality comparison ##### ##### A is as X a C as B ##### ##### the first 'as' is always the advmod of a following element, X, which is within the range of as... as ##### ##### the second 'as' is always the dependent of B ##### ##### B sometimes depends on the first 'as', sometimes dependts on X ##### ##### Sometimes X has a head that is also within the range of as...as ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() adj_node = DependencyGraphNode(UPOS="ADJ") noun_node = DependencyGraphNode(UPOS="NOUN") as1_node = DependencyGraphNode(FORM="as") as2_node = DependencyGraphNode(FORM="as") obj_node = DependencyGraphNode() pattern.add_nodes([noun_node, adj_node, as1_node, as2_node, obj_node]) pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, as1_node, r'\w*advmod\w*') pattern.add_dependency(as1_node, obj_node, r'\w*advcl:as\w*') pattern.add_dependency(obj_node, as2_node, r'mark') as_as_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] dep_as1_node = match[as1_node] dep_as2_node = match[as2_node] dep_obj_node = match[obj_node] if dep_as1_node.LOC < dep_adj_node.LOC < dep_noun_node.LOC < dep_as2_node.LOC < dep_obj_node.LOC: pred = [ node for node in dep_graph.nodes() if dep_as1_node.LOC <= node.LOC <= dep_adj_node.LOC ] pred.append(dep_as2_node) pred.sort(key=lambda x: x.LOC) head = dep_adj_node asas_node = merge_dep_nodes(pred, UPOS="ADJ", LOC=dep_as2_node.LOC) as_as_pred.append( (pred, head, asas_node, dep_noun_node, dep_obj_node)) for pred, head, asas_node, dep_noun_node, dep_obj_node in as_as_pred: dep_graph.replace_nodes(pred, asas_node) dep_graph.remove_dependency(asas_node, dep_obj_node) dep_graph.remove_dependency(dep_noun_node, asas_node) dep_graph.add_dependency(dep_noun_node, dep_obj_node, "acl:" + asas_node.FORM)
def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def be_adp_phrase(dep_graph: DependencyGraph): """ example: is for xxx this should be not applied: 1. if xxx is adj, then be_adj_verb will be applied; 2. if xxx is NOUN, then copula_phrase will be applied note that there may be multiple adp: the insurgency is out of the picture :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() some_node = pattern.create_node() adp_node = pattern.create_node(UPOS="ADP") be_node = pattern.create_node(UPOS="AUX") pattern.add_dependency(some_node, be_node, r'cop') pattern.add_dependency(some_node, adp_node, r'case') verb_phrases = [] for match in dep_graph.match(pattern): dep_be_node = match[be_node] dep_some_node = match[some_node] dep_adp_nodes = [ n for n, l in dep_graph.children( dep_some_node, filter=lambda n, l: "case" in l and n.UPOS == "ADP") ] if not all(dep_be_node.LOC < x.LOC < dep_some_node.LOC for x in dep_adp_nodes): continue pred = [dep_be_node] + dep_adp_nodes head = dep_be_node verb_phrases.append((dep_some_node, pred, head)) for dep_some_node, verbs, root in verb_phrases: if not all(dep_graph.get_node(v.ID) for v in verbs): continue # has been processed verb_node = merge_dep_nodes(verbs, UPOS="AUX", LOC=root.LOC) for node in verbs: dep_graph.remove_dependency(dep_some_node, node) dep_graph.replace_nodes(verbs, verb_node) dep_graph.add_dependency(dep_some_node, verb_node, "cop")
def amod_obl(dep_graph: DependencyGraph): """ ##### include: more than, successful by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON") adj_node = DependencyGraphNode(UPOS="ADJ") adp_node = DependencyGraphNode(UPOS="ADP") obl_node = DependencyGraphNode() pattern.add_nodes([noun_node, adj_node, adp_node, obl_node]) pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, obl_node, r'obl:\w+') pattern.add_dependency(obl_node, adp_node, r'case') more_than_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] dep_obl_node = match[obl_node] dep_adp_node = match[adp_node] obl_nodes = list( dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l)) if len(obl_nodes) > 1: # similar in form to the one continue if dep_adp_node.FORM not in dep_graph.get_dependency( dep_adj_node, dep_obl_node).values(): continue if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC: more_than_pred.append( (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node)) for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred: nodes = [dep_adj_node, dep_adp_node] more_than_pred = merge_dep_nodes(nodes, UPOS="ADP", LOC=dep_adp_node.LOC) dep_graph.remove_dependency(dep_noun_node, dep_adj_node) dep_graph.remove_dependency(dep_adj_node, dep_obl_node) dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred) dep_graph.add_dependency(dep_noun_node, dep_obl_node, "nmod:" + more_than_pred.FORM)
def and_or(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() parent_node = pattern.create_node() some_node = pattern.create_node() and_node = pattern.create_node(LEMMA=r"\band\b") or_node = pattern.create_node(LEMMA=r"\bor\b") pattern.add_dependency(parent_node, some_node, r'\bconj:\w*') pattern.add_dependency(some_node, and_node, r'\bcc\b') pattern.add_dependency(some_node, or_node, r'\bcc\b') pattern.add_dependency(and_node, or_node, r'\bconj') for match in list(dep_graph.match(pattern)): dep_parent_node = match[parent_node] dep_some_node = match[some_node] dep_and_node = match[and_node] dep_or_node = match[or_node] rel = dep_graph.get_dependency(dep_parent_node, dep_some_node) if not rel.startswith("conj:and") and not rel.startswith("conj:or"): continue and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC] if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]): continue and_or_nodes.append(dep_and_node) and_or_nodes.append(dep_or_node) and_or_nodes.sort(key=lambda n: n.LOC) if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]): continue new_and_or_node = merge_dep_nodes(and_or_nodes, UPOS=dep_and_node.UPOS, LOC=dep_and_node.LOC, FEATS=dep_and_node.FEATS ) dep_graph.replace_nodes(and_or_nodes, new_and_or_node) dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
def adverbial_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ Adverbial Clause ##### run in order to catch it. advcl with mark (in order to) ##### ##### he worked hard, replacing his feud. advcl without mark ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(verb_node, modifier_node, "advcl") for match in list(dep_graph.match(pattern)): dep_verb_node = match[verb_node] dep_modifier_node = match[modifier_node] if context.is_processed(dep_verb_node, dep_modifier_node): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) logger.debug("adverbial clause: verb={0}, modifier={1}".format( dep_verb_node.position, dep_modifier_node.position)) if oia_graph.has_relation(oia_verb_node, oia_modifier_node): continue mark = list( dep_graph.children(dep_modifier_node, filter=lambda n, rel: "mark" in rel)) if mark: mark, rel = mark[0] pred_node = oia_graph.add_words(mark.position) if pred_node is None: continue if mark.LEMMA in CONJUNCTION_WORDS[language]: continue oia_graph.add_argument(pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(pred_node, oia_modifier_node, 2) else: oia_graph.add_mod(oia_modifier_node, oia_verb_node)
def multi_word_sconj(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node(UPOS="VERB") verb2_node = pattern.create_node(UPOS="VERB") mark_node = pattern.create_node(UPOS="SCONJ") pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*') pattern.add_dependency(verb2_node, mark_node, r'mark') mark_phrases = [] for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_verb2_node = match[verb2_node] dep_mark_node = match[mark_node] if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values(): continue new_marks = list(dep_graph.offsprings(dep_mark_node)) if len(new_marks) == 1: continue new_marks.sort(key=lambda n: n.LOC) mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks)) for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases: if not all([dep_graph.get_node(x.ID) for x in new_marks]): continue dep_graph.remove_dependency(dep_verb2_node, dep_mark_node) dep_graph.remove_dependency(dep_verb_node, dep_verb2_node) new_mark_node = merge_dep_nodes(new_marks, UPOS=dep_mark_node.UPOS, LOC=dep_mark_node.LOC ) dep_graph.replace_nodes(new_marks, new_mark_node) dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA) dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
def it_verb_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx to do ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") verb_node = pattern.create_node(UPOS="VERB") subj_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|VERB") pattern.add_dependency(verb_node, it_node, r'expl') pattern.add_dependency(verb_node, subj_node, r'nsubj|csubj') for match in dep_graph.match(pattern): dep_verb_node, dep_it_node, dep_subj_node = \ [match[x] for x in [verb_node, it_node, subj_node]] if context.is_processed(dep_verb_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) if dep_it_node.LOC < dep_subj_node.LOC: # it VERB subj that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 1) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_ref(oia_it_node, oia_subj_node) else: # subj VERB it that ... oia_graph.add_argument(oia_verb_node, oia_it_node, 2) oia_graph.add_argument(oia_verb_node, oia_subj_node, 2) oia_graph.add_ref(oia_it_node, oia_subj_node) # dep_graph.remove_dependency(dep_verb_node, dep_subj_node) context.processed(dep_verb_node, dep_it_node) context.processed(dep_verb_node, dep_subj_node)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def xcomp_verb(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ pattern = DependencyGraph() pred_node = pattern.create_node() xcomp_verb_node = pattern.create_node(UPOS="VERB|AUX") xcomp_mark_node = pattern.create_node(UPOS="PART") pattern.add_dependency(pred_node, xcomp_verb_node, "xcomp") pattern.add_dependency(xcomp_verb_node, xcomp_mark_node, "mark") for match in list(dep_graph.match(pattern)): dep_pred_node = match[pred_node] dep_xcomp_verb_node = match[xcomp_verb_node] dep_xcomp_mark_node = match[xcomp_mark_node] if dep_xcomp_mark_node.LEMMA != "to": # print('--------------------------LEMMA: ',dep_xcomp_mark_node.LEMMA) # raise Exception("Unexpected Situation: xcomp mark != to let's throw out to see what happens") continue if dep_xcomp_mark_node.LOC > dep_xcomp_verb_node.LOC: raise Exception( "Unexpected Situation: xcomp mark after the xcomp verb") pred_nodes = list( dep_graph.parents(dep_xcomp_verb_node, filter=lambda n, l: "xcomp" in l)) if len(pred_nodes) > 1: raise Exception( "Unexpected Situation: Multiple xcomp parents found") new_verb_phrase = [dep_xcomp_mark_node, dep_xcomp_verb_node] dep_new_verb = merge_dep_nodes(new_verb_phrase, UPOS="VERB", LOC=dep_xcomp_verb_node.LOC) dep_graph.replace_nodes(new_verb_phrase, dep_new_verb)
def such_that(dep_graph: DependencyGraph): """ ##### such a high price that :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS="NOUN") such_node = DependencyGraphNode(FORM="such") clause_pred_node = DependencyGraphNode(UPOS="VERB") that_node = DependencyGraphNode(FORM="that") pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node]) pattern.add_dependency(noun_node, such_node, r'det:predet') pattern.add_dependency(such_node, clause_pred_node, r'advcl:that') pattern.add_dependency(clause_pred_node, that_node, r'mark') such_that_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_such_node = match[such_node] dep_clause_pred_node = match[clause_pred_node] dep_that_node = match[that_node] if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC: such_that_pred.append((dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node)) for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred: nodes = [dep_such_node, dep_that_node] such_that_pred = merge_dep_nodes(nodes, UPOS="SCONJ", LOC=dep_that_node.LOC) dep_graph.add_node(such_that_pred) dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node, "advcl:" + such_that_pred.FORM) dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark") dep_graph.remove_node(dep_such_node) dep_graph.remove_node(dep_that_node)
def adv_question(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() question_node = pattern.create_node( UPOS="ADV|ADJ", LEMMA=r"(\bhow\b|\bwhat\b|\bwhere\b|\bwhen\b|why\b)\w*") verb_node = pattern.create_node(UPOS="VERB|AUX") # subj_node = pattern.create_node() pattern.add_dependency(verb_node, question_node, "advmod|amod") # pattern.add_dependency(verb_node, subj_node, r"\w*subj") for match in list(dep_graph.match(pattern)): dep_question_node, dep_verb_node = \ [match[x] for x in (question_node, verb_node)] # if not dep_question_node.LOC < dep_subj_node.LOC: # # not a question # continue oia_question_node = oia_graph.add_words(dep_question_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_graph.remove_relation(oia_verb_node, oia_question_node) for parent, rel in list(oia_graph.parents(oia_verb_node)): if rel.mod: continue oia_graph.remove_relation(parent, oia_verb_node) oia_graph.add_relation(parent, oia_question_node, rel) oia_graph.add_function(oia_question_node, oia_verb_node)
def det_of_noun(dep_graph: DependencyGraph): """ any/some/all of noun :param dep_graph: :return: """ pattern = DependencyGraph() det_node = pattern.create_node(UPOS="DET") of_node = pattern.create_node(LEMMA="of") noun2_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM") pattern.add_dependency(det_node, noun2_node, "nmod:of") pattern.add_dependency(noun2_node, of_node, "case") for match in list(dep_graph.match(pattern)): dep_det_node = match[det_node] dep_noun2_node = match[noun2_node] dep_of_node = match[of_node] if not all([dep_det_node, dep_noun2_node, dep_of_node]): # processed by others continue if isinstance(dep_noun2_node, DependencyGraphSuperNode) and dep_noun2_node.is_conj: continue dep_noun2_parents = [ parent for parent, rel in dep_graph.parents(dep_noun2_node) ] if len(dep_noun2_parents) == 1: assert dep_noun2_parents[0] == dep_det_node new_noun_nodes = [dep_det_node, dep_of_node, dep_noun2_node] new_noun = merge_dep_nodes(new_noun_nodes, UPOS=dep_det_node.UPOS, FEATS=dep_det_node.FEATS, LOC=dep_det_node.LOC) dep_graph.replace_nodes(new_noun_nodes, new_noun)
def be_not_phrase(dep_graph: DependencyGraph): """TODO: add doc string """ pattern = DependencyGraph() be_node = pattern.create_node() # contain the be verb obj_node = pattern.create_node() # not_node = pattern.create_node(UPOS="PART") not_node = pattern.create_node() pattern.add_node(be_node) pattern.add_node(obj_node) pattern.add_node(not_node) pattern.add_dependency(be_node, obj_node, r'\w*obj\w*') pattern.add_dependency(obj_node, not_node, r'\w*advmod\w*') be_not = [] for match in dep_graph.match(pattern): # print("be_not_phrase match !!!!!!!!!!!!!!") dep_be_node = match[be_node] dep_obj_node = match[obj_node] dep_not_node = match[not_node] if not "be" in dep_be_node.LEMMA.split(" "): continue if not "not" in dep_not_node.LEMMA.split(" "): continue if (dep_not_node.LOC > dep_obj_node.LOC) or (dep_be_node.LOC > dep_not_node.LOC): continue be_not.append((dep_be_node, dep_obj_node, dep_not_node)) for dep_be_node, dep_obj_node, dep_not_node in be_not: dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod') verb_node = merge_dep_nodes((dep_be_node, dep_not_node), UPOS=dep_be_node.UPOS, LOC=dep_be_node.LOC) dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
def it_be_adjv_that(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Expletive ##### ##### it is xxx that ##### ##### ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() it_node = pattern.create_node(LEMMA="it") be_node = pattern.create_node(UPOS="VERB") csubj_node = pattern.create_node(UPOS="ADJ|ADV") that_node = pattern.create_node(LEMMA="that") pattern.add_dependency(be_node, it_node, r'expl') pattern.add_dependency(be_node, csubj_node, r'csubj') pattern.add_dependency(csubj_node, that_node, r'mark') for match in dep_graph.match(pattern): dep_be_node, dep_it_node, dep_that_node, dep_csubj_node = \ [match[x] for x in [be_node, it_node, that_node, csubj_node]] if context.is_processed(dep_be_node, dep_it_node): continue oia_it_node = oia_graph.add_words(dep_it_node.position) oia_csubj_node = oia_graph.add_words(dep_csubj_node.position) # oia_that_node = oia_graph.add_word_with_head(dep_that_node) oia_be_node = oia_graph.add_words(dep_be_node.position) oia_graph.add_argument(oia_be_node, oia_it_node, 1) oia_graph.add_ref(oia_csubj_node, oia_it_node) context.processed(dep_be_node, dep_it_node)
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## :param sentence: :return: """ pattern = DependencyGraph() center_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*') for match in dep_graph.match(pattern): dep_center_node = match[center_node] dep_modifier_node = match[modifier_node] rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node) if "nmod:poss" in rels and dep_center_node in set( dep_graph.offsprings(dep_modifier_node)): # whose in there continue if oia_graph.has_relation(dep_center_node, dep_modifier_node, direct_link=False): continue oia_center_node = oia_graph.add_words(dep_center_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_center_node)
def num_pair(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() num1_node = pattern.create_node(UPOS="NUM") num2_node = pattern.create_node(UPOS="NUM") case_node = pattern.create_node(LEMMA=r"--|-|by") pattern.add_dependency(num1_node, num2_node, r'nmod') pattern.add_dependency(num2_node, case_node, r'case') num_intervals = [] for match in dep_graph.match(pattern): dep_num1_node = match[num1_node] dep_num2_node = match[num2_node] dep_case_node = match[case_node] if dep_num1_node.LOC < dep_case_node.LOC < dep_num2_node.LOC or \ dep_num2_node.LOC < dep_case_node.LOC < dep_num1_node.LOC: interval = [dep_num1_node, dep_case_node, dep_num2_node] interval.sort(key=lambda x: x.LOC) num_intervals.append(interval) for interval in num_intervals: interval_node = merge_dep_nodes(interval, UPOS="NOUN", LOC=interval[-1].LOC ) dep_graph.replace_nodes(interval, interval_node)
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
def noun_of_noun(dep_graph: DependencyGraph): """ :param dep_graph: :return: """ pattern = DependencyGraph() noun1_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM") of_node = pattern.create_node(LEMMA="of") noun2_node = pattern.create_node(UPOS="NOUN|PROPN|PRON|X|NUM|SYM") pattern.add_dependency(noun1_node, noun2_node, "nmod:of") pattern.add_dependency(noun2_node, of_node, "case") merged_map = dict() # need_merge = [] for match in list(dep_graph.match(pattern)): dep_noun1_node = match[noun1_node] if dep_noun1_node in merged_map: dep_noun1_node = merged_map[dep_noun1_node] dep_noun2_node = match[noun2_node] if dep_noun2_node in merged_map: dep_noun2_node = merged_map[dep_noun2_node] dep_of_node = match[of_node] if not all([dep_noun1_node, dep_noun2_node, dep_of_node]): # processed by others continue involved_in_complex_structure = False for child, rel in dep_graph.children(dep_noun2_node): if "conj" in rel or "acl" in rel: involved_in_complex_structure = True for parent, rel in dep_graph.parents(dep_noun2_node): if "conj" in rel or "acl" in rel: involved_in_complex_structure = True if involved_in_complex_structure: continue if isinstance(dep_noun1_node, DependencyGraphSuperNode) and dep_noun1_node.is_conj: continue if isinstance(dep_noun2_node, DependencyGraphSuperNode) and dep_noun2_node.is_conj: continue dep_noun2_parents = [ parent for parent, rel in dep_graph.parents(dep_noun2_node) ] if len(dep_noun2_parents) == 1: if dep_noun2_parents[0] != dep_noun1_node: logger.error("dep_noun1 {0} {1}".format( dep_noun1_node.ID, dep_noun1_node.FORM)) logger.error("dep_noun2 {0} {1}".format( dep_noun2_node.ID, dep_noun2_node.FORM)) logger.error("dep_noun2_parent {0} {1}".format( dep_noun2_parents[0].ID, dep_noun2_parents[0].FORM)) raise Exception("Noun of Noun failed") new_noun_nodes = [dep_noun1_node, dep_of_node, dep_noun2_node] # <<<<<<< HEAD new_noun = merge_dep_nodes(new_noun_nodes, UPOS=dep_noun1_node.UPOS, FEATS=dep_noun1_node.FEATS, LOC=dep_noun1_node.LOC) dep_graph.replace_nodes(new_noun_nodes, new_noun) for node in new_noun_nodes: merged_map[node] = new_noun logger.debug("node merged :" + " ".join( [dep_noun1_node.ID, dep_of_node.ID, dep_noun2_node.ID]))
def amod_xcomp_to_acl(dep_graph: DependencyGraph): """ something extracted by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN") adj_node = pattern.create_node(UPOS="ADJ") verb_node = pattern.create_node(UPOS="VERB") pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, verb_node, r"xcomp") for match in list(dep_graph.match(pattern)): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] dep_adj_node = match[adj_node] try: [ dep_graph.get_node(x.ID) for x in [dep_noun_node, dep_verb_node, dep_adj_node] ] except Exception as e: # has been processed by previous match continue xcomp_nodes = [ n for n, l in dep_graph.children( dep_adj_node, filter=lambda n, l: l.startswith("xcomp")) ] mark_nodes_list = [] for dep_xcomp_node in xcomp_nodes: mark_nodes = [ n for n, l in dep_graph.children( dep_xcomp_node, filter=lambda n, l: l.startswith("mark") and dep_adj_node. LOC < n.LOC < dep_xcomp_node.LOC) ] if mark_nodes: mark_nodes_list.append(mark_nodes) if len(mark_nodes_list) > 1: raise Exception("Unexpected Situation Happened") new_verb_nodes = [dep_adj_node] if mark_nodes_list: mark_nodes = mark_nodes_list[0] new_verb_nodes.extend(mark_nodes) new_verb_nodes.sort(key=lambda x: x.LOC) new_verb_nodes = ["(be)"] + new_verb_nodes new_node = merge_dep_nodes(new_verb_nodes, UPOS="VERB", LOC=new_verb_nodes[-1].LOC, FEATS={"VerbForm": "Ger"}) dep_graph.replace_nodes(new_verb_nodes, new_node) dep_graph.set_dependency(dep_noun_node, new_node, "acl") for dep_xcomp_node in xcomp_nodes: dep_graph.remove_dependency(dep_xcomp_node, new_node) dep_graph.set_dependency(new_node, dep_verb_node, "obj")
def acl_verb_obl_case(dep_graph: DependencyGraph): """ something extracted by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() subj_node = pattern.create_node() verb_node = pattern.create_node(UPOS="VERB") obj_node = pattern.create_node() case_node = pattern.create_node() pattern.add_dependency(subj_node, verb_node, r'acl') pattern.add_dependency(verb_node, obj_node, r'obl:\w*') pattern.add_dependency(obj_node, case_node, r'case') phrases = [] for match in dep_graph.match(pattern): dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] dep_obj_node = match[obj_node] dep_case_node = match[case_node] obl_nodes = [ n for n, l in dep_graph.children( dep_verb_node, filter=lambda n, l: l.startswith("obl")) ] if len(obl_nodes) > 1: continue existing_obj_nodes = [ n for n, l in dep_graph.children( dep_verb_node, filter=lambda n, l: "obj" in l or "comp" in l) ] if existing_obj_nodes: continue obl_rel = dep_graph.get_dependency(dep_verb_node, dep_obj_node) if dep_case_node.FORM not in obl_rel.values(): continue # there are may be other cases, join them all dep_case_nodes = [ n for n, l in dep_graph.children(dep_obj_node, filter=lambda n, l: l.startswith("case") and dep_verb_node.LOC < n.LOC < dep_obj_node.LOC) ] subjs = list( dep_graph.children(dep_verb_node, filter=lambda n, l: "subj" in l)) if len(subjs) > 1: continue phrases.append( (dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes)) for dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes in phrases: new_verb_phrase = [dep_verb_node] + dep_case_nodes logging.debug("acl_verb_obl_case: we are merging nodes") logging.debug("\n".join(str(node) for node in new_verb_phrase)) new_verb_node = merge_dep_nodes(new_verb_phrase, UPOS=dep_verb_node.UPOS, LOC=dep_verb_node.LOC, FEATS=dep_verb_node.FEATS) logging.debug("acl_verb_obl_case: we obtain a new node") logging.debug(str(new_verb_node)) dep_graph.remove_dependency(dep_verb_node, dep_obj_node) for node in dep_case_nodes: dep_graph.remove_dependency(dep_obj_node, node) dep_graph.replace_nodes(new_verb_phrase, new_verb_node) dep_graph.add_dependency(new_verb_node, dep_obj_node, "obj")
def adv_verb_modifier(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ the adv before the verb should be processed by verb_phrase this converter should process the adv after the verb verb1 in order to verb2 :param sentence: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun verb_node = DependencyGraphNode( UPOS="VERB|NOUN|PROPN|AUX|PRON") # aux is for be word adv_node = DependencyGraphNode(UPOS="ADV|X|NOUN|ADJ|VERB") pattern.add_nodes([verb_node, adv_node]) pattern.add_dependency(verb_node, adv_node, r'advmod') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] if context.is_processed(dep_verb_node, dep_adv_node): continue if oia_graph.has_relation(dep_verb_node, dep_adv_node): continue obl_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("obl")) ] obl_node = None obl_has_case = False if len(obl_children) == 1: obl_node = obl_children[0] case_nodes = list(n for n, l in dep_graph.children( obl_node, filter=lambda n, l: "case" in l)) if case_nodes: # if obl with case, let the oblique to process it obl_has_case = True mark_children = [ x for x, l in dep_graph.children( dep_adv_node, filter=lambda n, l: l.startswith("mark")) ] oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_adv_node = oia_graph.add_words(dep_adv_node.position) if obl_node and not obl_has_case: # arg_nodes = list(dep_graph.offsprings(obl_node)) # arg_nodes.sort(key=lambda x: x.LOC) # arg_words = [x.ID for x in arg_nodes] # head = obl_node.ID oia_arg_node = oia_graph.add_words(obl_node.position) oia_graph.add_argument(oia_adv_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_adv_node, oia_arg_node, 2) else: if mark_children: mark_node = mark_children[0] oia_pred_node = oia_graph.add_words(mark_node.position) oia_graph.add_argument(oia_pred_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, oia_adv_node, 2) else: oia_graph.add_mod(oia_adv_node, oia_verb_node)
def adv_ccomp(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() # TODO: it seems that in UD labeling, adv is used instead of adj for noun # verb_node = pattern.create_node(UPOS="VERB|NOUN|PROPN") adv_node = pattern.create_node(UPOS="ADV|X|NOUN|PART") # part is for "not" ccomp_node = pattern.create_node() # pattern.add_dependency(verb_node, adv_node, r'advmod') pattern.add_dependency(adv_node, ccomp_node, r"ccomp|xcomp") patterns = [] for match in dep_graph.match(pattern): # dep_verb_node = match[verb_node] dep_adv_node = match[adv_node] dep_ccomp_node = match[ccomp_node] if oia_graph.has_relation(dep_adv_node, dep_ccomp_node): continue dep_case_nodes = [ n for n, l in dep_graph.children(dep_ccomp_node, filter=lambda n, l: "case" == l and dep_adv_node .LOC < n.LOC < dep_ccomp_node.LOC) ] if dep_case_nodes: dep_case_nodes = continuous_component(dep_case_nodes, dep_case_nodes[0]) predicate_nodes = [dep_adv_node] + dep_case_nodes predicate_nodes.sort(key=lambda n: n.LOC) else: predicate_nodes = [dep_adv_node] dep_subj_nodes = [ n for n, l in dep_graph.parents(dep_adv_node, filter=lambda n, l: "advmod" == l and n.UPOS in {"ADV", "X", "NOUN"}) ] if len(dep_subj_nodes) > 1: raise Exception("Multiple subject") elif len(dep_subj_nodes) > 0: dep_subj_node = dep_subj_nodes[0] else: dep_subj_node = None patterns.append([dep_subj_node, predicate_nodes, dep_ccomp_node]) for dep_subj_node, predicate_nodes, dep_ccomp_node in patterns: if len(predicate_nodes) > 1: new_pred_node = dep_graph.create_node( ID=" ".join([x.ID for x in predicate_nodes]), FORM=" ".join([x.FORM for x in predicate_nodes]), LEMMA=" ".join([x.LEMMA for x in predicate_nodes]), UPOS="ADV", LOC=predicate_nodes[0].LOC) new_pred_node.aux = True dep_graph.replace_nodes(predicate_nodes, new_pred_node) dep_graph.remove_dependency(dep_ccomp_node, new_pred_node) else: new_pred_node = predicate_nodes[0] oia_pred_node = oia_graph.add_words(new_pred_node.position) if dep_subj_node: oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_argument(oia_pred_node, oia_subj_node, 1, mod=True) else: oia_ccomp_node = oia_graph.add_words(dep_ccomp_node.position) oia_graph.add_argument(oia_pred_node, oia_ccomp_node, 2)