def such_that(dep_graph: DependencyGraph): """ ##### such a high price that :param dep_graph: :param oia_graph: :return: """ pattern = DependencyGraph() noun_node = DependencyGraphNode(UPOS="NOUN") such_node = DependencyGraphNode(FORM="such") clause_pred_node = DependencyGraphNode(UPOS="VERB") that_node = DependencyGraphNode(FORM="that") pattern.add_nodes([noun_node, such_node, clause_pred_node, that_node]) pattern.add_dependency(noun_node, such_node, r'det:predet') pattern.add_dependency(such_node, clause_pred_node, r'advcl:that') pattern.add_dependency(clause_pred_node, that_node, r'mark') such_that_pred = [] for match in dep_graph.match(pattern): dep_noun_node = match[noun_node] dep_such_node = match[such_node] dep_clause_pred_node = match[clause_pred_node] dep_that_node = match[that_node] if dep_such_node.LOC < dep_noun_node.LOC < dep_that_node.LOC < dep_clause_pred_node.LOC: such_that_pred.append((dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node)) for dep_noun_node, dep_such_node, dep_clause_pred_node, dep_that_node in such_that_pred: nodes = [dep_such_node, dep_that_node] such_that_pred = merge_dep_nodes(nodes, UPOS="SCONJ", LOC=dep_that_node.LOC) dep_graph.add_node(such_that_pred) dep_graph.add_dependency(dep_noun_node, dep_clause_pred_node, "advcl:" + such_that_pred.FORM) dep_graph.add_dependency(dep_clause_pred_node, such_that_pred, "mark") dep_graph.remove_node(dep_such_node) dep_graph.remove_node(dep_that_node)
def process_conjunction(dep_graph: DependencyGraph, root: DependencyGraphNode): """ :param dep_graph: :param root: :return: """ conj_childs = [ child for child, rels in dep_graph.children( root, filter=lambda n, l: l.startswith("conj")) ] assert conj_childs parallel_components = [root] for child in conj_childs: is_nest = any( grand_rels.startswith("conj") for grand_sun, grand_rels in dep_graph.children(child)) if is_nest: logger.debug("nested conj is found ") logger.debug(str(child)) conj_node, parallel_nodes = process_conjunction(dep_graph, child) logger.debug("conj_node is created ") logger.debug(str(conj_node)) for node in parallel_nodes: logger.debug("Containing nodes ") logger.debug(str(node)) rels = list(dep_graph.get_dependency(root, node)) for rel in rels: if rel.startswith("conj"): logger.debug("remove dependency {0}".format( (root.ID, node.ID, rel))) dep_graph.remove_dependency(root, node, rel) dep_graph.add_dependency(root, conj_node, rel) child = conj_node parallel_components.append(child) parallel_components.sort(key=lambda x: x.LOC) # if all(n.UPOS in NOUN_UPOS for n in parallel_components): # # logger.debug("Processing all noun conjunction") # # is_pure_noun = True # # merging_noun_nodes = [] # min_loc = 10000 # max_loc = -1 # for child in parallel_components: # if isinstance(child, DependencyGraphNode): # min_loc = min(min_loc, child.LOC) # max_loc = max(min_loc, child.LOC) # elif isinstance(child, DependencyGraphSuperNode): # min_loc = min(min_loc, min([x.LOC for x in child.nodes])) # max_loc = max(max_loc, max([x.LOC for x in child.nodes])) # merging_noun_nodes.extend(dep_graph.offsprings(child)) # # logger.debug("Checking acl for {0}".format(child)) # for n, l in dep_graph.children(child): # logger.debug(n) # logger.debug("label {0}".format(l)) # if "acl" in l: # is_pure_noun = False # break # # if is_pure_noun: # merging_noun_nodes = [n for n in merging_noun_nodes if min_loc <= n.LOC <= max_loc] # is_pure_noun = not any(n.UPOS in {"ADP", "VERB", "SCONJ", "AUX"} for n in merging_noun_nodes) # # if is_pure_noun: # # merged_noun_nodes.sort(key=lambda x: x.LOC) # for node in merging_noun_nodes: # logger.debug("merging {0}".format(node)) # # new_noun = merge_dep_nodes(merging_noun_nodes, UPOS=root.UPOS, LOC=root.LOC) # dep_graph.replace_nodes(merging_noun_nodes, new_noun) # # return new_noun, [] root_parents = list(set(parent for parent, rels in dep_graph.parents(root))) root_parents.sort(key=lambda x: x.LOC) # ic(list(map(str, root_parents))) conj_node, with_arg_palceholder = build_conjunction_node( dep_graph, root, root_parents, parallel_components) relation_to_conj = get_relation_to_conj(dep_graph, root, root_parents, parallel_components) case_marks = dict() for index, node in enumerate(parallel_components): case_marks[node.ID] = [(n, l) for n, l in dep_graph.children(node) if ("case" in l or "mark" in l or "cc" in l)] for key, values in case_marks.items(): for v in values: logger.debug("case_marker = {} {} {}".format( key, v[0].ID, v[1].rels)) logger.debug("relation_to_conj = {}".format(relation_to_conj)) for parent in root_parents: # ic(parent) prefix, shared_prefix, required_mark = relation_to_conj[parent.ID] if any(x in prefix for x in {"subj", "obj", "ccomp", "xcomp"}) \ or not required_mark or len(set(required_mark)) == 1: for node in parallel_components: dep_graph.remove_dependency(parent, node) relation = prefix if required_mark and len(set(required_mark)) == 1: ## with same mark mark_lemma = list(set(required_mark))[0] relation += ":" + mark_lemma mark_node = find_mark(case_marks, parallel_components, mark_lemma) if mark_node: mark_node, mark_rel = mark_node dep_graph.remove_node(mark_node) dep_graph.add_node(mark_node) # clear the dependency dep_graph.add_dependency(conj_node, mark_node, mark_rel) else: logger.error("cannot find the mark node") dep_graph.add_dependency(parent, conj_node, relation) else: complete_missing_case_mark(dep_graph, root, root_parents, parallel_components, relation_to_conj, case_marks) if not required_mark: required_mark = [None] * len(parallel_components) for index, (node, mark) in enumerate( zip(parallel_components, required_mark)): if mark: rel = prefix + ":" + mark else: rel = prefix # if rel.startswith("conj"): # continue logger.debug("add dependency {0}".format( (parent.ID, node.ID, rel))) dep_graph.add_dependency(parent, node, rel) for idx, node in enumerate(parallel_components): if node != root: rels = dep_graph.get_dependency(root, node) for rel in rels: if rel.startswith("conj"): dep_graph.remove_dependency(root, node) if with_arg_palceholder: index = idx + 1 else: # a, but b, b should be the arg1 and a be the arg2 index = len(parallel_components) - idx dep_graph.add_dependency(conj_node, node, "arg_conj:{0}".format(index)) return conj_node, parallel_components
def secondary_predicate(dep_graph: DependencyGraph): """ detect the case of xcomp as a secondary predicate, and add implicit (be) node to make a predicate :param dep_graph: :return: """ pattern = DependencyGraph() pred_node = pattern.create_node() xcomp_node = pattern.create_node(UPOS=r'(?!VERB\b)\b\w+') xcomp_subj_node = pattern.create_node() pattern.add_dependency(pred_node, xcomp_node, "xcomp") pattern.add_dependency(xcomp_node, xcomp_subj_node, "nsubj") pattern.add_dependency(pred_node, xcomp_subj_node, "obj") for match in list(dep_graph.match(pattern)): dep_pred_node = match[pred_node] dep_xcomp_node = match[xcomp_node] dep_xcomp_subj_node = match[xcomp_subj_node] # if not (dep_pred_node.LOC < dep_xcomp_subj_node.LOC and dep_pred_node.LOC < dep_xcomp_node.LOC): # raise Exception("Unexpected Situation, let's throw out to see what happens") # the position of dep_xcomp_subj_node and dep_xcomp_node may be reversed in questions # I can't tell you how ominous I found Bush's performance in that interview. if dep_pred_node.LOC < dep_xcomp_subj_node.LOC < dep_xcomp_node.LOC: dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node) dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node) dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node) if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV": new_pred_nodes = ["(be)", dep_xcomp_node] dep_be_node = merge_dep_nodes(new_pred_nodes, UPOS="VERB", LOC=dep_xcomp_node.LOC) dep_graph.add_node(dep_be_node) dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") for child, l in list(dep_graph.children(dep_xcomp_node)): dep_graph.remove_dependency(dep_xcomp_node, child) dep_graph.add_dependency(dep_be_node, child, l) dep_graph.remove_node(dep_xcomp_node) else: dep_be_node = dep_graph.create_node(FORM="(be)", LEMMA="(be)", UPOS="VERB", LOC=dep_xcomp_node.LOC - 0.5) dep_be_node.aux = True dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj") elif dep_xcomp_node.LOC < dep_pred_node.LOC: dep_graph.remove_dependency(dep_pred_node, dep_xcomp_node) dep_graph.remove_dependency(dep_pred_node, dep_xcomp_subj_node) dep_graph.remove_dependency(dep_xcomp_node, dep_xcomp_subj_node) # in question, for example : how ominous # I can't tell you how ominous I found Bush's performance in that interview. dep_be_node = dep_graph.create_node(FORM="(be)", LEMMA="(be)", UPOS="VERB", LOC=dep_xcomp_node.LOC - 0.5) dep_be_node.aux = True dep_graph.add_dependency(dep_pred_node, dep_be_node, "obj") dep_graph.add_dependency(dep_be_node, dep_xcomp_subj_node, "nsubj") if dep_xcomp_node.UPOS == "ADJ" or dep_xcomp_node.UPOS == "ADV": dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "amod") else: dep_graph.add_dependency(dep_be_node, dep_xcomp_node, "obj")
def build_conjunction_node(dep_graph: DependencyGraph, root, root_parents, parallel_components): """ :param dep_graph: :param parallel_components: :return: """ parallel_components.sort(key=lambda x: x.LOC) conj_phrases = [] for n1, n2 in pairwise(parallel_components): node1 = n1 node2 = n2 cur_conjs = [] for n, l in sorted(list(dep_graph.children(node2)), key=lambda x: x[0].LOC): if not node1.LOC < n.LOC < node2.LOC: continue if ("case" in l or "mark" in l or "cc" in l) and \ (any(x in n.LEMMA for x in {"and", "or", "but", "not", "as well as"}) or n.UPOS == "CCONJ"): cur_conjs.append(n) if "punct" in l: cur_conjs.append(n) if ("advmod" in l) and any(x in n.LEMMA for x in {"so", "also"}): if len(list(dep_graph.children(n))) == 0: cur_conjs.append(n) if not cur_conjs: conj_phrases.append(["AND"]) else: conj_phrases.append(cur_conjs) if len(conj_phrases) == 1: unified_conj_phrase = conj_phrases[0] with_arg_palceholder = False else: with_arg_palceholder = True unified_conj_phrase = ["{1}"] for index, phrase in enumerate(conj_phrases): unified_conj_phrase.extend(phrase) unified_conj_phrase.append("{{{0}}}".format(index + 2)) for n, l in sorted(list(dep_graph.children(parallel_components[0])), key=lambda x: x[0].LOC, reverse=True): if l == "cc:preconj": unified_conj_phrase.insert(0, n) dep_graph.remove_node(n) # uposes = set([p.UPOS for p in root_parents]) # uposes.add(root.UPOS) conj_node = merge_dep_nodes( unified_conj_phrase, is_conj=True, UPOS=root.UPOS, FEATS=root.FEATS, LOC=root.LOC, ) for conj_phrase in conj_phrases: for n in conj_phrase: if isinstance(n, DependencyGraphNode): dep_graph.remove_node(n) dep_graph.add_node(conj_node) return conj_node, with_arg_palceholder