def acl_mod_verb(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ this is called after adnominal_clause_mark, which means there is no mark :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = pattern.create_node(UPOS="NOUN|PRON|PROPN|ADJ|ADV|NUM") # ADJ is for the cases that "many/some" are abbrv of many X/some X, representing NOUN # ADV is for the case of "here" for "i am here thinking xxx" verb_node = pattern.create_node(UPOS="VERB|AUX") # aux is for can, have which ommits the true verb pattern.add_nodes([noun_node, verb_node]) pattern.add_dependency(noun_node, verb_node, r'acl') for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_verb_node = match[verb_node] if context.is_processed(dep_noun_node, dep_verb_node): continue if oia_graph.has_relation(dep_noun_node, dep_verb_node, direct_link=False): continue oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_noun_node = oia_graph.add_words(dep_noun_node.position) dep = dep_graph.get_dependency(dep_noun_node, dep_verb_node) labels = [x for x in dep.rels if x.startswith("acl:")] pred = None if labels: assert len(labels) == 1 label = labels[0] pred = label.split(":")[1] if pred == "relcl": pred = None # if pred: # # there is no mark, but we add it because it may be because of not being shared in conjunction # # oia_pred_node = oia_graph.add_aux(pred) # oia_graph.add_argument(oia_pred_node, oia_noun_node, 1, mod=True) # oia_graph.add_argument(oia_pred_node, oia_verb_node, 2) # else: oia_graph.add_mod(oia_verb_node, oia_noun_node)
def amod_obl(dep_graph: DependencyGraph): """ ##### include: more than, successful by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS=r"NOUN|PRON") adj_node = DependencyGraphNode(UPOS="ADJ") adp_node = DependencyGraphNode(UPOS="ADP") obl_node = DependencyGraphNode() pattern.add_nodes([noun_node, adj_node, adp_node, obl_node]) pattern.add_dependency(noun_node, adj_node, r'amod') pattern.add_dependency(adj_node, obl_node, r'obl:\w+') pattern.add_dependency(obl_node, adp_node, r'case') more_than_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_adj_node = match[adj_node] dep_obl_node = match[obl_node] dep_adp_node = match[adp_node] obl_nodes = list( dep_graph.children(dep_adj_node, filter=lambda n, l: "obl" in l)) if len(obl_nodes) > 1: # similar in form to the one continue if dep_adp_node.FORM not in dep_graph.get_dependency( dep_adj_node, dep_obl_node).values(): continue if dep_noun_node.LOC < dep_adj_node.LOC < dep_adp_node.LOC < dep_obl_node.LOC: more_than_pred.append( (dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node)) for dep_noun_node, dep_adj_node, dep_obl_node, dep_adp_node in more_than_pred: nodes = [dep_adj_node, dep_adp_node] more_than_pred = merge_dep_nodes(nodes, UPOS="ADP", LOC=dep_adp_node.LOC) dep_graph.remove_dependency(dep_noun_node, dep_adj_node) dep_graph.remove_dependency(dep_adj_node, dep_obl_node) dep_graph.replace_nodes([dep_adj_node, dep_adp_node], more_than_pred) dep_graph.add_dependency(dep_noun_node, dep_obl_node, "nmod:" + more_than_pred.FORM)
def and_or(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() parent_node = pattern.create_node() some_node = pattern.create_node() and_node = pattern.create_node(LEMMA=r"\band\b") or_node = pattern.create_node(LEMMA=r"\bor\b") pattern.add_dependency(parent_node, some_node, r'\bconj:\w*') pattern.add_dependency(some_node, and_node, r'\bcc\b') pattern.add_dependency(some_node, or_node, r'\bcc\b') pattern.add_dependency(and_node, or_node, r'\bconj') for match in list(dep_graph.match(pattern)): dep_parent_node = match[parent_node] dep_some_node = match[some_node] dep_and_node = match[and_node] dep_or_node = match[or_node] rel = dep_graph.get_dependency(dep_parent_node, dep_some_node) if not rel.startswith("conj:and") and not rel.startswith("conj:or"): continue and_or_nodes = [n for n in dep_graph.nodes() if dep_and_node.LOC < n.LOC < dep_or_node.LOC] if any([node.UPOS in {"VERB", "NOUN", "ADJ", "ADP", "ADV"} for node in and_or_nodes]): continue and_or_nodes.append(dep_and_node) and_or_nodes.append(dep_or_node) and_or_nodes.sort(key=lambda n: n.LOC) if not all([dep_graph.get_node(x.ID) for x in and_or_nodes]): continue new_and_or_node = merge_dep_nodes(and_or_nodes, UPOS=dep_and_node.UPOS, LOC=dep_and_node.LOC, FEATS=dep_and_node.FEATS ) dep_graph.replace_nodes(and_or_nodes, new_and_or_node) dep_graph.set_dependency(dep_parent_node, dep_some_node, "conj:" + new_and_or_node.FORM)
def multi_word_sconj(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() verb_node = pattern.create_node(UPOS="VERB") verb2_node = pattern.create_node(UPOS="VERB") mark_node = pattern.create_node(UPOS="SCONJ") pattern.add_dependency(verb_node, verb2_node, r'advcl:\w*') pattern.add_dependency(verb2_node, mark_node, r'mark') mark_phrases = [] for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_verb2_node = match[verb2_node] dep_mark_node = match[mark_node] if dep_mark_node.LEMMA not in dep_graph.get_dependency(dep_verb_node, dep_verb2_node).values(): continue new_marks = list(dep_graph.offsprings(dep_mark_node)) if len(new_marks) == 1: continue new_marks.sort(key=lambda n: n.LOC) mark_phrases.append((dep_verb_node, dep_verb2_node, dep_mark_node, new_marks)) for (dep_verb_node, dep_verb2_node, dep_mark_node, new_marks) in mark_phrases: if not all([dep_graph.get_node(x.ID) for x in new_marks]): continue dep_graph.remove_dependency(dep_verb2_node, dep_mark_node) dep_graph.remove_dependency(dep_verb_node, dep_verb2_node) new_mark_node = merge_dep_nodes(new_marks, UPOS=dep_mark_node.UPOS, LOC=dep_mark_node.LOC ) dep_graph.replace_nodes(new_marks, new_mark_node) dep_graph.add_dependency(dep_verb_node, dep_verb2_node, "advcl:" + new_mark_node.LEMMA) dep_graph.add_dependency(dep_verb2_node, new_mark_node, "mark")
def acl_loop(dep_graph: DependencyGraph): """ :param dep_graph: :param oia_graph: :return: """ for n1, n2, deps in dep_graph.dependencies(): if "acl:relcl" in deps: back_deps = dep_graph.get_dependency(n2, n1) if any(x in back_deps for x in {"obl", "nsubj", "obj", "mark", "advmod"}): dep_graph.remove_dependency(n2, n1)
def nmod_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## ##### the office of the chair ##### ##### Istanbul in Turkey ##### :param sentence: :return: """ pattern = DependencyGraph() parent_node = DependencyGraphNode() child_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_nodes([parent_node, child_node, case_node]) pattern.add_dependency(parent_node, child_node, r'\w*nmod\w*') pattern.add_dependency(child_node, case_node, r'\w*case\w*') for match in dep_graph.match(pattern): dep_parent_node = match[parent_node] dep_child_node = match[child_node] dep_case_node = match[case_node] rel = dep_graph.get_dependency(dep_parent_node, dep_child_node) # vs, lemma = versus # according, lemma = accord, # but rel always select the shorter one if oia_graph.has_relation(dep_parent_node, dep_child_node): continue if rel != "nmod:" + dep_case_node.LEMMA and rel != 'nmod:' + dep_case_node.FORM: pred_node = oia_graph.add_words(dep_case_node.position) else: pred_node = oia_graph.add_words(dep_case_node.position) arg1_node = oia_graph.add_words(dep_parent_node.position) arg2_node = oia_graph.add_words(dep_child_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)
def get_relation_to_conj(dep_graph: DependencyGraph, root, root_parents, parallel_components): """ :param dep_graph: :param parallel_components: :return: """ relation_to_conj = dict() for parent in root_parents: prefixs = [] marks = [] shared_prefix = True for child in parallel_components: rels = dep_graph.get_dependency(parent, child) if rels: rel = list(rels.rels)[0] if child != root and rel.startswith("conj"): continue if ":" in rel: prefix, mark = rel.split(":") if mark in {"relcl", "xsubj", "pass", "poss", "tmod"}: prefix = rel mark = None else: prefix = rel mark = None prefixs.append(prefix) marks.append(mark) else: shared_prefix = False marks.append(None) # ic(str(parent)) # ic(list(map(str, parallel_components))) assert (len(set(prefixs))) == 1 prefix = prefixs[0] if all([m is None for m in marks]): marks = None if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}): marks = None relation_to_conj[parent.ID] = (prefix, shared_prefix, marks) return relation_to_conj
def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def nmod_without_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ #################### nmod:x ######################## :param sentence: :return: """ pattern = DependencyGraph() center_node = pattern.create_node() modifier_node = pattern.create_node() pattern.add_dependency(center_node, modifier_node, r'\w*nmod\w*') for match in dep_graph.match(pattern): dep_center_node = match[center_node] dep_modifier_node = match[modifier_node] rels = dep_graph.get_dependency(dep_center_node, dep_modifier_node) if "nmod:poss" in rels and dep_center_node in set( dep_graph.offsprings(dep_modifier_node)): # whose in there continue if oia_graph.has_relation(dep_center_node, dep_modifier_node, direct_link=False): continue oia_center_node = oia_graph.add_words(dep_center_node.position) oia_modifier_node = oia_graph.add_words(dep_modifier_node.position) oia_graph.add_mod(oia_modifier_node, oia_center_node)
def two_node_with_case(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ regular_nodes = [ n for n in dep_graph.nodes() if n.UPOS not in {"ROOT", "PUNCT"} ] #logger.debug("regular nodes") #for node in regular_nodes: # logger.debug(str(node)) if len(regular_nodes) == 2: regular_nodes.sort(key=lambda x: x.LOC) case_node, noun_node = regular_nodes if dep_graph.get_dependency(noun_node, case_node) == "case": oia_case_node = oia_graph.add_words(case_node.position) oia_noun_node = oia_graph.add_words(noun_node.position) oia_graph.add_argument(oia_case_node, oia_noun_node, 2)
def acl_verb_obl_case(dep_graph: DependencyGraph): """ something extracted by :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() subj_node = pattern.create_node() verb_node = pattern.create_node(UPOS="VERB") obj_node = pattern.create_node() case_node = pattern.create_node() pattern.add_dependency(subj_node, verb_node, r'acl') pattern.add_dependency(verb_node, obj_node, r'obl:\w*') pattern.add_dependency(obj_node, case_node, r'case') phrases = [] for match in dep_graph.match(pattern): dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] dep_obj_node = match[obj_node] dep_case_node = match[case_node] obl_nodes = [ n for n, l in dep_graph.children( dep_verb_node, filter=lambda n, l: l.startswith("obl")) ] if len(obl_nodes) > 1: continue existing_obj_nodes = [ n for n, l in dep_graph.children( dep_verb_node, filter=lambda n, l: "obj" in l or "comp" in l) ] if existing_obj_nodes: continue obl_rel = dep_graph.get_dependency(dep_verb_node, dep_obj_node) if dep_case_node.FORM not in obl_rel.values(): continue # there are may be other cases, join them all dep_case_nodes = [ n for n, l in dep_graph.children(dep_obj_node, filter=lambda n, l: l.startswith("case") and dep_verb_node.LOC < n.LOC < dep_obj_node.LOC) ] subjs = list( dep_graph.children(dep_verb_node, filter=lambda n, l: "subj" in l)) if len(subjs) > 1: continue phrases.append( (dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes)) for dep_subj_node, dep_verb_node, dep_obj_node, dep_case_nodes in phrases: new_verb_phrase = [dep_verb_node] + dep_case_nodes logging.debug("acl_verb_obl_case: we are merging nodes") logging.debug("\n".join(str(node) for node in new_verb_phrase)) new_verb_node = merge_dep_nodes(new_verb_phrase, UPOS=dep_verb_node.UPOS, LOC=dep_verb_node.LOC, FEATS=dep_verb_node.FEATS) logging.debug("acl_verb_obl_case: we obtain a new node") logging.debug(str(new_verb_node)) dep_graph.remove_dependency(dep_verb_node, dep_obj_node) for node in dep_case_nodes: dep_graph.remove_dependency(dep_obj_node, node) dep_graph.replace_nodes(new_verb_phrase, new_verb_node) dep_graph.add_dependency(new_verb_node, dep_obj_node, "obj")
def object_relative_clause(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ ##### Object-extracted/referred relative clause ##### ##### the person that Andy knows ##### :param sentence: :return: """ pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode() pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_subj_node = match[subj_node] dep_verb_node = match[verb_node] if dep_subj_node.LEMMA in {"what", "who", "which", "that"}: continue logger.debug("we found a objective relative clause") logger.debug("entity: {0}".format(dep_entity_node)) logger.debug("subject: {0}".format(dep_subj_node)) logger.debug("verb: {0}".format(dep_verb_node)) if context.is_processed(dep_entity_node, dep_verb_node): logger.debug("processed") continue context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_entity_node = oia_graph.add_words(dep_entity_node.position) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) if oia_graph.has_relation(oia_entity_node, oia_verb_node): logger.debug("has relation between entity and verb") continue oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_verb_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) oia_graph.add_ref(oia_entity_node, oia_ref_node) logger.debug("we are coping with ref between:") logger.debug(dep_verb_node) logger.debug(ref_node) ref_relation = dep_graph.get_dependency(dep_verb_node, ref_node) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if ref_relation: if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1, mod=True) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_mod(oia_verb_node, oia_entity_node) else: if "obj" in ref_relation: oia_graph.add_argument(oia_verb_node, oia_ref_node, 2) elif ref_relation == "advmod": oia_graph.add_mod(oia_ref_node, oia_verb_node) else: raise Exception( "unknown relation: {}".format(ref_relation)) # oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1) oia_graph.add_argument(oia_verb_node, oia_entity_node, 2, mod=True) rels = dep_graph.get_dependency(dep_entity_node, dep_verb_node) #if rels.endswith("obj"): for node, l in dep_graph.children(dep_verb_node): if l == "ccomp": oia_ccomp_node = oia_graph.add_words(node.position) oia_graph.add_argument(oia_verb_node, oia_ccomp_node, 3)
def verb_phrase(dep_graph: DependencyGraph): """ ##### Merging aux and cop with their head VERB ##### Cases: :param sentence: :return: """ verb_phrases = [] for node in dep_graph.nodes(filter=lambda x: x.UPOS in {"VERB", "AUX"}): if node.UPOS == "AUX": parent = [ n for n, l in dep_graph.parents(node, filter=lambda n, l: l == "aux") ] if len(parent) > 0: continue # if "VerbForm" in node.FEATS and "Ger" in node.FEATS["VerbForm"]: # continue if "Tense" in node.FEATS and "Past" in node.FEATS["Tense"]: # if the verb is before the noun, it will be processed by noun_phrase and taken as a part of the noun parent = [ n for n, l in dep_graph.parents( node, filter=lambda n, l: l == "amod" and node.LOC < n.LOC) ] if len(parent) > 0: continue # logger.debug("We are checking node {0}".format(node)) root = node verbs = [root] for n, l in dep_graph.children(root): if dep_graph.get_dependency(n, root): continue if n.LEMMA in {"so", "also", "why"}: continue if "advmod" in l: offsprings = list(dep_graph.offsprings(n)) if any(x.UPOS in {"VERB", "NOUN", "AUX", "PRON"} for x in offsprings): continue verbs.extend(offsprings) elif "compound" in l: verbs.append(n) verbs = [ x for x in verbs if x.LOC <= root.LOC or "compound" in dep_graph.get_dependency(root, x) ] # logger.debug("Verb: before continuous component ") # logger.debug("\n".join(str(verb) for verb in verbs)) verbs = continuous_component(verbs, root) # add aux verbs.extend(n for n, l in dep_graph.children(root) if "aux" in l) # logger.debug("Verb: after continuous component ") # for verb in verbs: # logger.debug(verb) verbs.sort(key=lambda x: x.LOC) last_loc = verbs[-1].LOC # next_node = dep_graph.get_node_by_loc(last_loc + 1) # if next_node and next_node.LEMMA == "not": # verbs.append(next_node) if len(verbs) > 1: verb_phrases.append((verbs, root)) for verbs, root in verb_phrases: verb_node = merge_dep_nodes(verbs, UPOS="VERB", LOC=root.LOC, FEATS=root.FEATS) dep_graph.replace_nodes(verbs, verb_node)
def multi_words_case(dep_graph: DependencyGraph): """ :TODO add example case :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode() x_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_node(noun_node) pattern.add_node(x_node) pattern.add_node(case_node) pattern.add_dependency(noun_node, x_node, r'\w*:\w*') pattern.add_dependency(x_node, case_node, r'\bcase\b') for match in list(dep_graph.match(pattern)): multiword_cases = [] dep_noun_node = match[noun_node] dep_x_node = match[x_node] dep_case_node = match[case_node] if not dep_graph.has_node(dep_case_node): continue direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)] all_case_nodes = set() for node in direct_case_nodes: all_case_nodes.update(dep_graph.offsprings(node)) if len(all_case_nodes) == 1: continue all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC) logger.debug("multi case discovered") for node in all_case_nodes: logger.debug(str(node)) # if len(case_nodes) > 2: # raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases") x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node) for rel in x_rel: if ":" in rel: # print('-----------------rel: ',rel) rel_str, case_str = rel.split(":") # some times, the rel only contains one word # Example : # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications) if case_str in "_".join([x.LEMMA for x in all_case_nodes]): multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str)) for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases: logger.debug("we are merging:") for node in case_nodes: logger.debug(str(node)) if not all([dep_graph.has_node(x) for x in case_nodes]): continue new_case_node = merge_dep_nodes(case_nodes, UPOS=dep_case_node.UPOS, LOC=dep_case_node.LOC ) dep_graph.replace_nodes(case_nodes, new_case_node) dep_graph.remove_dependency(dep_noun_node, dep_x_node) dep_graph.add_dependency(dep_noun_node, dep_x_node, rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode): """ :param dep_graph: :param root: :return: """ conj_childs = [ child for child, rels in dep_graph.children( root, filter=lambda n, l: l.startswith("conj")) ] assert conj_childs parallel_components = [root] for child in conj_childs: is_nest = any( grand_rels.startswith("conj") for grand_sun, grand_rels in dep_graph.children(child)) if is_nest: logger.debug("nested conj is found ") logger.debug(str(child)) conj_node, parallel_nodes = process_conjunction(dep_graph, child) logger.debug("conj_node is created ") logger.debug(str(conj_node)) for node in parallel_nodes: logger.debug("Containing nodes ") logger.debug(str(node)) rels = list(dep_graph.get_dependency(root, node)) for rel in rels: if rel.startswith("conj"): logger.debug("remove dependency {0}".format( (root.ID, node.ID, rel))) dep_graph.remove_dependency(root, node, rel) dep_graph.add_dependency(root, conj_node, rel) child = conj_node parallel_components.append(child) parallel_components.sort(key=lambda x: x.LOC) # if all(n.UPOS in NOUN_UPOS for n in parallel_components): # # logger.debug("Processing all noun conjunction") # # is_pure_noun = True # # merging_noun_nodes = [] # min_loc = 10000 # max_loc = -1 # for child in parallel_components: # if isinstance(child, DependencyGraphNode): # min_loc = min(min_loc, child.LOC) # max_loc = max(min_loc, child.LOC) # elif isinstance(child, DependencyGraphSuperNode): # min_loc = min(min_loc, min([x.LOC for x in child.nodes])) # max_loc = max(max_loc, max([x.LOC for x in child.nodes])) # merging_noun_nodes.extend(dep_graph.offsprings(child)) # # logger.debug("Checking acl for {0}".format(child)) # for n, l in dep_graph.children(child): # logger.debug(n) # logger.debug("label {0}".format(l)) # if "acl" in l: # is_pure_noun = False # break # # if is_pure_noun: # merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc] # is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes) # # if is_pure_noun: # # merged_noun_nodes.sort(key=lambda x: x.LOC) # for node in merging_noun_nodes: # logger.debug("merging {0}".format(node)) # # new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC) # dep_graph.replace_nodes(merging_noun_nodes, new_noun) # # return new_noun, [] root_parents = list(set(parent for parent, rels in dep_graph.parents(root))) root_parents.sort(key=lambda x: x.LOC) # ic(list(map(str, root_parents))) conj_node, with_arg_palceholder = build_conjunction_node( dep_graph, root, root_parents, parallel_components) relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents, parallel_components) case_marks = dict() for index, node in enumerate(parallel_components): case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node) if ("case" in l or "mark" in l or "cc" in l)] for key, values in case_marks.items(): for v in values: logger.debug("case_marker = {} {} {}".format( key, v[0].ID, v[1].rels)) logger.debug("relation_to_conj = {}".format(relation_to_conj)) for parent in root_parents: # ic(parent) prefix, shared_prefix, required_mark = relation_to_conj[parent.ID] if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \ or not required_mark or len(set(required_mark)) == 1: for node in parallel_components: dep_graph.remove_dependency(parent, node) relation = prefix if required_mark and len(set(required_mark)) == 1: ## with same mark mark_lemma = list(set(required_mark))[0] relation += ":" + mark_lemma mark_node = find_mark(case_marks, parallel_components, mark_lemma) if mark_node: mark_node, mark_rel = mark_node dep_graph.remove_node(mark_node) dep_graph.add_node(mark_node) # clear the dependency dep_graph.add_dependency(conj_node, mark_node, mark_rel) else: logger.error("cannot find the mark node") dep_graph.add_dependency(parent, conj_node, relation) else: complete_missing_case_mark(dep_graph, root, root_parents, parallel_components, relation_to_conj, case_marks) if not required_mark: required_mark = [None] * len(parallel_components) for index, (node, mark) in enumerate( zip(parallel_components, required_mark)): if mark: rel = prefix + ":" + mark else: rel = prefix # if rel.startswith("conj"): # continue logger.debug("add dependency {0}".format( (parent.ID, node.ID, rel))) dep_graph.add_dependency(parent, node, rel) for idx, node in enumerate(parallel_components): if node != root: rels = dep_graph.get_dependency(root, node) for rel in rels: if rel.startswith("conj"): dep_graph.remove_dependency(root, node) if with_arg_palceholder: index = idx + 1 else: # a, but b, b should be the arg1 and a be the arg2 index = len(parallel_components) - idx dep_graph.add_dependency(conj_node, node, "arg_conj:{0}".format(index)) return conj_node, parallel_components