def oblique_without_prep(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|NOUN|ADJ|PROPN|PRON") oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'obl:tmod|obl:npmod|obl') for match in dep_graph.match(pattern): dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node, direct_link=False): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_types = oblique_edge.values() if "tmod" in oblique_types: oia_pred_node = oia_graph.add_aux("TIME_IN") arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(oia_pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(oia_pred_node, arg2_node, 2) else: # "npmod" in oblique_types and others oia_verb_node = oia_graph.add_words(dep_verb_node.position) obl_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_mod(obl_node, oia_verb_node)
def such_that(dep_graph: DependencyGraph): """ ##### such a high price that :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS="NOUN") such_node = DependencyGraphNode(FORM="such") clause_pred_node = DependencyGraphNode(UPOS="VERB") that_node = DependencyGraphNode(FORM="that") pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node]) pattern.add_dependency(noun_node, such_node, r'det:predet') pattern.add_dependency(such_node, clause_pred_node, r'advcl:that') pattern.add_dependency(clause_pred_node, that_node, r'mark') such_that_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_such_node = match[such_node] dep_clause_pred_node = match[clause_pred_node] dep_that_node = match[that_node] if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC: such_that_pred.append((dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node)) for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred: nodes = [dep_such_node, dep_that_node] such_that_pred = merge_dep_nodes(nodes, UPOS="SCONJ", LOC=dep_that_node.LOC) dep_graph.add_node(such_that_pred) dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node, "advcl:" + such_that_pred.FORM) dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark") dep_graph.remove_node(dep_such_node) dep_graph.remove_node(dep_that_node)
def be_not_phrase(dep_graph: DependencyGraph): """TODO: add doc string """ pattern = DependencyGraph() be_node = pattern.create_node() # contain the be verb obj_node = pattern.create_node() # not_node = pattern.create_node(UPOS="PART") not_node = pattern.create_node() pattern.add_node(be_node) pattern.add_node(obj_node) pattern.add_node(not_node) pattern.add_dependency(be_node, obj_node, r'\w*obj\w*') pattern.add_dependency(obj_node, not_node, r'\w*advmod\w*') be_not = [] for match in dep_graph.match(pattern): # print("be_not_phrase match !!!!!!!!!!!!!!") dep_be_node = match[be_node] dep_obj_node = match[obj_node] dep_not_node = match[not_node] if not "be" in dep_be_node.LEMMA.split(" "): continue if not "not" in dep_not_node.LEMMA.split(" "): continue if (dep_not_node.LOC > dep_obj_node.LOC) or (dep_be_node.LOC > dep_not_node.LOC): continue be_not.append((dep_be_node, dep_obj_node, dep_not_node)) for dep_be_node, dep_obj_node, dep_not_node in be_not: dep_graph.remove_dependency(dep_obj_node, dep_not_node, 'advmod') verb_node = merge_dep_nodes((dep_be_node, dep_not_node), UPOS=dep_be_node.UPOS, LOC=dep_be_node.LOC) dep_graph.replace_nodes([dep_be_node, dep_not_node], verb_node)
def subject_relative_clause(dep_graph, oia_graph, context: UD2OIAContext): """ ##### Subject-extracted/referred relative clause ##### ##### the person who is tall / that is killed -- with ref ##### ##### the person waiting for the baby -- without ref ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() entity_node = DependencyGraphNode() relcl_node = DependencyGraphNode() pattern.add_node(entity_node) pattern.add_node(relcl_node) # pattern.add_dependency(relcl_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_relcl_node = match[relcl_node] subj_nodes = [ n for n, l in dep_graph.children(dep_relcl_node, filter=lambda n, l: "subj" in l) ] if subj_nodes and subj_nodes[0].ID != dep_entity_node.ID: continue oia_verb_node = oia_graph.add_words(dep_relcl_node.position) oia_enitity_node = oia_graph.add_words(dep_entity_node.position) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) case_nodes.sort(key=lambda x: x.LOC) if case_nodes: # with which xxxx, the with will become the root pred case_node = case_nodes[-1] oia_case_node = oia_graph.add_words(case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1) oia_graph.add_argument(oia_case_node, oia_ref_node, 2, mod=True) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument(oia_verb_node, oia_ref_node, 1, mod=True) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument( oia_verb_node, oia_enitity_node, 1, mod=True) # function and pred, seems we need another label
def subject_relative_clause_loop(dep_graph, oia_graph, context: UD2OIAContext): """ The loop version is because that the match algorithm donot match part of the loop, see test_match for more detail ##### Subject-extracted/referred relative clause ##### ##### the person who is tall / that is killed -- with ref ##### ##### the person waiting for the baby -- without ref ##### :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() entity_node = DependencyGraphNode() relcl_node = DependencyGraphNode() pattern.add_node(entity_node) pattern.add_node(relcl_node) pattern.add_dependency(relcl_node, entity_node, r'\w*subj\w*') pattern.add_dependency(entity_node, relcl_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_relcl_node = match[relcl_node] oia_verb_node = oia_graph.add_words(dep_relcl_node.position) oia_enitity_node = oia_graph.add_words(dep_entity_node.position) def __valid_ref(n, l): return l == "ref" and dep_entity_node.LOC < n.LOC < dep_relcl_node.LOC ref_nodes = list(n for n, l in dep_graph.children(dep_entity_node, filter=__valid_ref)) ref_nodes.sort(key=lambda x: x.LOC) if ref_nodes: ref_node = ref_nodes[-1] oia_ref_node = oia_graph.add_words(ref_node.position) dep_case_nodes = list(n for n, l in dep_graph.children( ref_node, filter=lambda n, l: "case" in l)) dep_case_nodes.sort(key=lambda x: x.LOC) if dep_case_nodes: # with which xxxx, the with will become the root pred dep_case_node = dep_case_nodes[-1] oia_case_node = oia_graph.add_words(dep_case_node.position) oia_graph.add_argument(oia_case_node, oia_verb_node, 1) oia_graph.add_argument(oia_case_node, oia_ref_node, 2) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument(oia_verb_node, oia_ref_node, 1) oia_graph.add_ref(oia_enitity_node, oia_ref_node) else: oia_graph.add_argument( oia_verb_node, oia_enitity_node, 1, mod=True) # function and pred, seems we need another label pattern = DependencyGraph() verb_node = DependencyGraphNode() entity_node = DependencyGraphNode() subj_node = DependencyGraphNode(LEMMA=r"what|who|which|that") pattern.add_nodes([verb_node, entity_node, subj_node]) pattern.add_dependency(verb_node, subj_node, r'\w*subj\w*') pattern.add_dependency(entity_node, verb_node, r'\w*acl:relcl\w*') for match in dep_graph.match(pattern): dep_entity_node = match[entity_node] dep_verb_node = match[verb_node] dep_subj_node = match[subj_node] context.processed(dep_verb_node, dep_subj_node) context.processed(dep_entity_node, dep_verb_node) oia_verb_node = oia_graph.add_words(dep_verb_node.position) oia_enitity_node = oia_graph.add_words(dep_entity_node.position) oia_subj_node = oia_graph.add_words(dep_subj_node.position) oia_graph.add_mod(oia_verb_node, oia_enitity_node) oia_graph.add_ref(oia_enitity_node, oia_subj_node) oia_graph.add_argument(oia_verb_node, oia_subj_node, 1)
def be_adj_verb_phrase(dep_graph): """ :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() adj_node = pattern.create_node(UPOS="ADJ|ADV") be_node = pattern.create_node() # contain the be verb pattern.add_node(adj_node) pattern.add_node(be_node) pattern.add_dependency(adj_node, be_node, r'cop') verb_phrases = [] for match in dep_graph.match(pattern): dep_adj_node = match[adj_node] dep_be_node = match[be_node] if not "be" in dep_be_node.LEMMA.split(" "): continue if dep_be_node.LOC > dep_adj_node.LOC: # may be question continue if isinstance(dep_adj_node, DependencyGraphSuperNode) and dep_adj_node.is_conj: continue verb_phrases.append((dep_be_node, dep_adj_node)) for be_node, adj_node in verb_phrases: conj_parents = [ n for n, l in dep_graph.parents(adj_node) if "arg_con" in l ] if conj_parents: adjv_brothers = [ n for n, l in dep_graph.children(conj_parents[0]) if "arg_con" in l and n.UPOS in {"ADJ", "ADV"} ] for node in adjv_brothers: if node != adj_node and len( [n for n, l in dep_graph.children(node) if "cop" in l]) == 0: node.FORM = "(be) " + node.FORM node.LEMMA = "(be) " + node.LEMMA # node.position.insert(0, "(be)") verb_node = merge_dep_nodes([be_node, adj_node], UPOS="VERB", LOC=be_node.LOC) dep_graph.replace_nodes([be_node, adj_node], verb_node)
def secondary_predicate(dep_graph: DependencyGraph): """ detect the case of xcomp as a secondary predicate, and add implicit (be) node to make a predicate :param dep_graph: :return: """ pattern = DependencyGraph() pred_node = pattern.create_node() xcomp_node = pattern.create_node(UPOS=r'(?!VERB\b)\b\w+') xcomp_subj_node = pattern.create_node() pattern.add_dependency(pred_node, xcomp_node, "xcomp") pattern.add_dependency(xcomp_node, xcomp_subj_node, "nsubj") pattern.add_dependency(pred_node, xcomp_subj_node, "obj") for match in list(dep_graph.match(pattern)): dep_pred_node = match[pred_node] dep_xcomp_node = match[xcomp_node] dep_xcomp_subj_node = match[xcomp_subj_node] # if not (dep_pred_node.LOC < dep_xcomp_subj_node.LOC and dep_pred_node.LOC < dep_xcomp_node.LOC): # raise Exception("Unexpected Situation, let's throw out to see what happens") # the position of dep_xcomp_subj_node and dep_xcomp_node may be reversed in questions # I can't tell you how ominous I found Bush's performance in that interview. if dep_pred_node.LOC < dep_xcomp_subj_node.LOC < dep_xcomp_node.LOC: dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node) dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node) dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node) if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV": new_pred_nodes = ["(be)", dep_xcomp_node] dep_be_node = merge_dep_nodes(new_pred_nodes, UPOS="VERB", LOC=dep_xcomp_node.LOC) dep_graph.add_node(dep_be_node) dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") for child, l in list(dep_graph.children(dep_xcomp_node)): dep_graph.remove_dependency(dep_xcomp_node, child) dep_graph.add_dependency(dep_be_node, child, l) dep_graph.remove_node(dep_xcomp_node) else: dep_be_node = dep_graph.create_node(FORM="(be)", LEMMA="(be)", UPOS="VERB", LOC=dep_xcomp_node.LOC - 0.5) dep_be_node.aux = True dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj") elif dep_xcomp_node.LOC < dep_pred_node.LOC: dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node) dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node) dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node) # in question, for example : how ominous # I can't tell you how ominous I found Bush's performance in that interview. dep_be_node = dep_graph.create_node(FORM="(be)", LEMMA="(be)", UPOS="VERB", LOC=dep_xcomp_node.LOC - 0.5) dep_be_node.aux = True dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV": dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "amod") else: dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")
def multi_words_case(dep_graph: DependencyGraph): """ :TODO add example case :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode() x_node = DependencyGraphNode() case_node = DependencyGraphNode() pattern.add_node(noun_node) pattern.add_node(x_node) pattern.add_node(case_node) pattern.add_dependency(noun_node, x_node, r'\w*:\w*') pattern.add_dependency(x_node, case_node, r'\bcase\b') for match in list(dep_graph.match(pattern)): multiword_cases = [] dep_noun_node = match[noun_node] dep_x_node = match[x_node] dep_case_node = match[case_node] if not dep_graph.has_node(dep_case_node): continue direct_case_nodes = [n for n, l in dep_graph.children(dep_x_node, filter=lambda n, l: "case" == l)] all_case_nodes = set() for node in direct_case_nodes: all_case_nodes.update(dep_graph.offsprings(node)) if len(all_case_nodes) == 1: continue all_case_nodes = sorted(list(all_case_nodes), key=lambda n: n.LOC) logger.debug("multi case discovered") for node in all_case_nodes: logger.debug(str(node)) # if len(case_nodes) > 2: # raise Exception("multi_words_case: Unexpected Situation: nodes with more than two cases") x_rel = dep_graph.get_dependency(dep_noun_node, dep_x_node) for rel in x_rel: if ":" in rel: # print('-----------------rel: ',rel) rel_str, case_str = rel.split(":") # some times, the rel only contains one word # Example : # that OBSF values within the extended trial balance may be misstated due to data issues ( above and beyond existing conversations with AA on model simplifications) if case_str in "_".join([x.LEMMA for x in all_case_nodes]): multiword_cases.append((dep_noun_node, dep_x_node, dep_case_node, all_case_nodes, rel_str)) for dep_noun_node, dep_x_node, dep_case_node, case_nodes, rel_str in multiword_cases: logger.debug("we are merging:") for node in case_nodes: logger.debug(str(node)) if not all([dep_graph.has_node(x) for x in case_nodes]): continue new_case_node = merge_dep_nodes(case_nodes, UPOS=dep_case_node.UPOS, LOC=dep_case_node.LOC ) dep_graph.replace_nodes(case_nodes, new_case_node) dep_graph.remove_dependency(dep_noun_node, dep_x_node) dep_graph.add_dependency(dep_noun_node, dep_x_node, rel_str + ":" + " ".join([x.LEMMA for x in case_nodes]))
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode): """ :param dep_graph: :param root: :return: """ conj_childs = [ child for child, rels in dep_graph.children( root, filter=lambda n, l: l.startswith("conj")) ] assert conj_childs parallel_components = [root] for child in conj_childs: is_nest = any( grand_rels.startswith("conj") for grand_sun, grand_rels in dep_graph.children(child)) if is_nest: logger.debug("nested conj is found ") logger.debug(str(child)) conj_node, parallel_nodes = process_conjunction(dep_graph, child) logger.debug("conj_node is created ") logger.debug(str(conj_node)) for node in parallel_nodes: logger.debug("Containing nodes ") logger.debug(str(node)) rels = list(dep_graph.get_dependency(root, node)) for rel in rels: if rel.startswith("conj"): logger.debug("remove dependency {0}".format( (root.ID, node.ID, rel))) dep_graph.remove_dependency(root, node, rel) dep_graph.add_dependency(root, conj_node, rel) child = conj_node parallel_components.append(child) parallel_components.sort(key=lambda x: x.LOC) # if all(n.UPOS in NOUN_UPOS for n in parallel_components): # # logger.debug("Processing all noun conjunction") # # is_pure_noun = True # # merging_noun_nodes = [] # min_loc = 10000 # max_loc = -1 # for child in parallel_components: # if isinstance(child, DependencyGraphNode): # min_loc = min(min_loc, child.LOC) # max_loc = max(min_loc, child.LOC) # elif isinstance(child, DependencyGraphSuperNode): # min_loc = min(min_loc, min([x.LOC for x in child.nodes])) # max_loc = max(max_loc, max([x.LOC for x in child.nodes])) # merging_noun_nodes.extend(dep_graph.offsprings(child)) # # logger.debug("Checking acl for {0}".format(child)) # for n, l in dep_graph.children(child): # logger.debug(n) # logger.debug("label {0}".format(l)) # if "acl" in l: # is_pure_noun = False # break # # if is_pure_noun: # merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc] # is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes) # # if is_pure_noun: # # merged_noun_nodes.sort(key=lambda x: x.LOC) # for node in merging_noun_nodes: # logger.debug("merging {0}".format(node)) # # new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC) # dep_graph.replace_nodes(merging_noun_nodes, new_noun) # # return new_noun, [] root_parents = list(set(parent for parent, rels in dep_graph.parents(root))) root_parents.sort(key=lambda x: x.LOC) # ic(list(map(str, root_parents))) conj_node, with_arg_palceholder = build_conjunction_node( dep_graph, root, root_parents, parallel_components) relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents, parallel_components) case_marks = dict() for index, node in enumerate(parallel_components): case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node) if ("case" in l or "mark" in l or "cc" in l)] for key, values in case_marks.items(): for v in values: logger.debug("case_marker = {} {} {}".format( key, v[0].ID, v[1].rels)) logger.debug("relation_to_conj = {}".format(relation_to_conj)) for parent in root_parents: # ic(parent) prefix, shared_prefix, required_mark = relation_to_conj[parent.ID] if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \ or not required_mark or len(set(required_mark)) == 1: for node in parallel_components: dep_graph.remove_dependency(parent, node) relation = prefix if required_mark and len(set(required_mark)) == 1: ## with same mark mark_lemma = list(set(required_mark))[0] relation += ":" + mark_lemma mark_node = find_mark(case_marks, parallel_components, mark_lemma) if mark_node: mark_node, mark_rel = mark_node dep_graph.remove_node(mark_node) dep_graph.add_node(mark_node) # clear the dependency dep_graph.add_dependency(conj_node, mark_node, mark_rel) else: logger.error("cannot find the mark node") dep_graph.add_dependency(parent, conj_node, relation) else: complete_missing_case_mark(dep_graph, root, root_parents, parallel_components, relation_to_conj, case_marks) if not required_mark: required_mark = [None] * len(parallel_components) for index, (node, mark) in enumerate( zip(parallel_components, required_mark)): if mark: rel = prefix + ":" + mark else: rel = prefix # if rel.startswith("conj"): # continue logger.debug("add dependency {0}".format( (parent.ID, node.ID, rel))) dep_graph.add_dependency(parent, node, rel) for idx, node in enumerate(parallel_components): if node != root: rels = dep_graph.get_dependency(root, node) for rel in rels: if rel.startswith("conj"): dep_graph.remove_dependency(root, node) if with_arg_palceholder: index = idx + 1 else: # a, but b, b should be the arg1 and a be the arg2 index = len(parallel_components) - idx dep_graph.add_dependency(conj_node, node, "arg_conj:{0}".format(index)) return conj_node, parallel_components
def build_conjunction_node(dep_graph: DependencyGraph, root, root_parents, parallel_components): """ :param dep_graph: :param parallel_components: :return: """ parallel_components.sort(key=lambda x: x.LOC) conj_phrases = [] for n1, n2 in pairwise(parallel_components): node1 = n1 node2 = n2 cur_conjs = [] for n, l in sorted(list(dep_graph.children(node2)), key=lambda x: x[0].LOC): if not node1.LOC < n.LOC < node2.LOC: continue if ("case" in l or "mark" in l or "cc" in l) and \ (any(x in n.LEMMA for x in {"and", "or", "but", "not", "as well as"}) or n.UPOS == "CCONJ"): cur_conjs.append(n) if "punct" in l: cur_conjs.append(n) if ("advmod" in l) and any(x in n.LEMMA for x in {"so", "also"}): if len(list(dep_graph.children(n))) == 0: cur_conjs.append(n) if not cur_conjs: conj_phrases.append(["AND"]) else: conj_phrases.append(cur_conjs) if len(conj_phrases) == 1: unified_conj_phrase = conj_phrases[0] with_arg_palceholder = False else: with_arg_palceholder = True unified_conj_phrase = ["{1}"] for index, phrase in enumerate(conj_phrases): unified_conj_phrase.extend(phrase) unified_conj_phrase.append("{{{0}}}".format(index + 2)) for n, l in sorted(list(dep_graph.children(parallel_components[0])), key=lambda x: x[0].LOC, reverse=True): if l == "cc:preconj": unified_conj_phrase.insert(0, n) dep_graph.remove_node(n) # uposes = set([p.UPOS for p in root_parents]) # uposes.add(root.UPOS) conj_node = merge_dep_nodes( unified_conj_phrase, is_conj=True, UPOS=root.UPOS, FEATS=root.FEATS, LOC=root.LOC, ) for conj_phrase in conj_phrases: for n in conj_phrase: if isinstance(n, DependencyGraphNode): dep_graph.remove_node(n) dep_graph.add_node(conj_node) return conj_node, with_arg_palceholder
def oblique_with_prep(dep_graph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ # cut X by a knife pattern = DependencyGraph() verb_node = DependencyGraphNode(UPOS="VERB|ADJ|ADV|NOUN|X|PROPN|PRON") # adj is for "has more on " # adv is for "south of XXXX" prep_node = DependencyGraphNode(UPOS=r"PRON|ADP|VERB|SCONJ|ADJ") # verb is for including/according, adj is for "prior to" oblique_node = DependencyGraphNode() pattern.add_node(verb_node) pattern.add_node(prep_node) pattern.add_node(oblique_node) pattern.add_dependency(verb_node, oblique_node, r'\bobl') pattern.add_dependency(oblique_node, prep_node, r"case|mark") for match in dep_graph.match(pattern): dep_prep_node = match[prep_node] dep_verb_node = match[verb_node] dep_oblique_node = match[oblique_node] if oia_graph.has_relation(dep_verb_node, dep_oblique_node): continue oblique_edge = dep_graph.get_dependency(dep_verb_node, dep_oblique_node) oblique_cases = oblique_edge.values() # if dep_prop_node.LEMMA.lower() not in cases: # continue prop_nodes = [ x for x, l in dep_graph.children( dep_oblique_node, filter=lambda n, l: l == "case" or l == "mark") ] connected_case_nodes = continuous_component(prop_nodes, dep_prep_node) predicate = tuple([x.ID for x in connected_case_nodes]) head_node = None for node in connected_case_nodes: if node.LEMMA.lower() in oblique_cases: head_node = node if not head_node: head_node = connected_case_nodes[-1] pred_node = oia_graph.add_words(head_node.position) arg1_node = oia_graph.add_words(dep_verb_node.position) arg2_node = oia_graph.add_words(dep_oblique_node.position) oia_graph.add_argument(pred_node, arg1_node, 1, mod=True) oia_graph.add_argument(pred_node, arg2_node, 2)