def backward(self, oia_graph: OIAGraph, ** kwargs): """ @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ fixed = False for node in list(oia_graph.nodes()): if not is_conjunction_without_args(node, oia_graph): continue relations = [(n, l.label) for n, l in oia_graph.children(node)] relations = list(filter(lambda x: x[1].startswith("pred.arg."), relations)) if not relations: continue if any(len(list(oia_graph.children(child))) for child, rel in relations): # child nodes also has child, not merge continue merged_words = sum([list(child.words()) for child, rel in relations], []) start = min([x for x in merged_words if isinstance(x, int)]) end = max([x for x in merged_words if isinstance(x, int)]) new_node = oia_graph.add_spans([(start, end)]) fixed = True for child, rel in relations: oia_graph.remove_node(child) oia_graph.replace(node, new_node) logger.debug("Merging {0} to {1}".format("|".join(oia_graph.node_text(child) for child, rel in relations), oia_graph.node_text(new_node))) return fixed
def forward(self, oia_graph: OIAGraph, **kwargs): """ split the noun phrase with of in it According to the previous merge operation, if there is any modification to the part after the of, the noun phrase will be not merged. So the noun phrases with of do not have any modification to the second part. @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ for node in list(oia_graph.nodes()): node_words = oia_graph.node_text(node).split(" ") try: index = node_words.index("of") except Exception as e: continue if len(node_words) == 1: # that is of continue of_split_words = [] current_words = [] for span in node.spans: if isinstance(span, str): current_words.append(span) else: start, end = span for idx in range(start, end + 1): if oia_graph.words[idx] == "of": of_split_words.append(current_words) of_split_words.append(idx) current_words = [] else: current_words.append(idx) if not current_words: # of is the ending, warning, maybe something like "because of " logger.warning("We found a of at the last of the phrase: " + oia_graph.node_text(node)) continue of_split_words.append(current_words) first_part_words = of_split_words[0] first_node = oia_graph.add_words(first_part_words) previous_node = first_node for p, l in list(oia_graph.parents(node)): oia_graph.add_relation(p, first_node, l.label) oia_graph.remove_relation(p, node) children = list(oia_graph.children(node)) if children: logger.warning( "noun of noun has {0} children, be careful!!!".format( len(children))) for c, l in children: logger.warning("Child: {} {}".format( l.label, oia_graph.node_text(c))) oia_graph.add_relation(first_node, c, l.label) oia_graph.remove_relation(node, c) oia_graph.remove_node(node) for of_word, noun_words in more_itertools.chunked( of_split_words[1:], 2): of_node = oia_graph.add_words([of_word]) next_node = oia_graph.add_words(noun_words) oia_graph.add_relation(previous_node, of_node, "as:pred.arg.1") oia_graph.add_relation(of_node, next_node, "pred.arg.2") previous_node = next_node
def forward(self, oia_graph: OIAGraph, **kwargs): """ @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ node_edge_mapping = { "VOC": "vocative", "APPOS": "appos", "DISCOURSE": "discourse", "REPARANDUM": "reparandum", "TOPIC": "topic", "TIME_IN": "mod" } for node in list(oia_graph.nodes()): if not (isinstance(node, OIAAuxNode) and node.label in { 'VOC', 'APPOS', 'TIME_IN', 'TOPIC', 'DISCOURSE', 'REPARANDUM' }): continue children = list(oia_graph.children(node)) parents = list(oia_graph.parents(node)) assert 0 < len(children) <= 2 if len(children) == 2: arg1 = [ child for child, edge in children if edge.label == "pred.arg.1" ] arg2 = [ child for child, edge in children if edge.label == "pred.arg.2" ] assert len(arg1) == 1 and len(arg2) == 1 arg1 = arg1[0] arg2 = arg2[0] oia_graph.add_relation(arg1, arg2, node_edge_mapping[node.label]) for parent, edge in parents: oia_graph.add_relation(parent, arg1, edge.label) oia_graph.remove_node(node) else: child, edge = children[0] if edge.label == "pred.arg.1": arg1 = child arg2 = [p for p, l in parents if l == "as:pred.arg.2"] assert len(arg2) == 1 arg2 = arg2[0] oia_graph.add_relation( arg2, arg1, "as:" + node_edge_mapping[node.label]) oia_graph.remove_node(node) elif edge.label == "pred.arg.2": arg2 = child arg1 = [ p for p, l in parents if l.label == "as:pred.arg.1" ] assert len(arg1) == 1, [l.label for p, l in parents] arg1 = arg1[0] oia_graph.add_relation(arg1, arg2, node_edge_mapping[node.label]) oia_graph.remove_node(node) else: raise Exception("Unknow edges: {}".format(edge.label))
def single_root(dep_graph: DependencyGraph, oia_graph: OIAGraph, context: UD2OIAContext): """ :param dep_graph: :param oia_graph: :return: """ in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [n for n, degree in in_degrees if degree == 0] if len(zero_degree_nodes) == 0: return elif len(zero_degree_nodes) == 1: root = zero_degree_nodes[0] else: # len(zero_degree_nodes) >= 2 dists_to_root = [] for oia_node in zero_degree_nodes: related_dep_nodes = set() if isinstance(oia_node, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(oia_node.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): related_dep_nodes.add(dep_node) elif isinstance(dep_node, list): for node in dep_node: related_dep_nodes.add(node) else: logger.error("get_node_by_spans return type unknown.") children = [n for n, l in oia_graph.children(oia_node)] for child in children: if isinstance(child, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(child.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): related_dep_nodes.add(dep_node) elif isinstance(dep_node, list): for node in dep_node: related_dep_nodes.add(node) else: logger.error( "get_node_by_spans return type unknown.") dep_root = dep_graph.get_node("0") real_dep_root = next(n for n, l in dep_graph.children(dep_root)) min_dist_to_root = min([ len( nx.shortest_path(dep_graph.g.to_undirected(), real_dep_root.ID, dep_node.ID)) for dep_node in related_dep_nodes ]) dists_to_root.append((oia_node, min_dist_to_root)) dists_to_root.sort(key=lambda x: x[1]) root_candidates = [] min_dist = dists_to_root[0][1] for oia_node, dist in dists_to_root: if dist == min_dist: root_candidates.append(oia_node) if len(root_candidates) == 1: root = root_candidates[0] else: scores = [] score_map = {":": 40, "\"": 30, ";": 20, ",": 10, "(": -10} for cand in root_candidates: score = -100 if any([ "func" in rel.label for n, rel in oia_graph.children(cand) ]): score = 100 children = [n for n, l in oia_graph.children(cand)] dep_children = [] for child in children: if isinstance(child, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(child.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): dep_children.append(dep_node) elif isinstance(dep_node, list): for node in dep_node: dep_children.append(node) else: logger.error( "get_node_by_spans return type unknown.") # check what between them dep_children.sort(key=lambda x: x.LOC) for node in dep_graph.nodes(): if node.LOC is None: continue if dep_children[0].LOC < node.LOC < dep_children[-1].LOC: if node.FORM in score_map: score = max(score, score_map[node.FORM]) if isinstance(cand, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(cand.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): if dep_node.LEMMA in IMPORTANT_CONNECTION_WORDS: score += 8 elif isinstance(dep_node, list): for node in dep_node: if node.LEMMA in IMPORTANT_CONNECTION_WORDS: score += 8 else: logger.error( "get_node_by_spans return type unknown.") elif isinstance(cand, OIAAuxNode) and cand.label == "PARATAXIS": score += 4 scores.append((cand, score)) scores.sort(key=lambda x: x[1], reverse=True) top_nodes = [] for node, score in scores: if score == scores[0][1]: top_nodes.append(node) if len(top_nodes) == 1: root = top_nodes[0] elif len(top_nodes) >= 3: # multiple top node found, merge them to one if all( isinstance(node, OIAAuxNode) and node.label == "PARATAXIS" for node in top_nodes): next_nodes = [] for top in top_nodes: for n, l in list(oia_graph.children(top)): next_nodes.append(n) oia_graph.remove_node(top) for node in zero_degree_nodes: if node.ID == top.ID: zero_degree_nodes.remove(node) root = oia_graph.add_aux("PARATAXIS") oia_graph.add_node(root) next_nodes.sort(key=lambda x: x.ID) for index, second_node in enumerate(next_nodes): oia_graph.add_argument(root, second_node, index) else: logger.error( "Deep intersection point, currently cannot process") return # raise Exception("Two top nodes? I think it is not possible ") else: # len(top_nodes) == 2: # check who is prev, and who is next dep_tops = [] for top in top_nodes: if isinstance(top, OIAWordsNode): dep_node = dep_graph.get_node_by_spans(top.spans) if dep_node: if isinstance(dep_node, DependencyGraphNode): dep_tops.append((top, dep_node)) elif isinstance(dep_node, list): for node in dep_node: dep_tops.append((top, node)) else: logger.error( "get_node_by_spans return type unknown.") if not len(dep_tops) >= 1: logger.error("Multiple AUX head ") return dep_tops.sort(key=lambda x: x[1].LOC) root = dep_tops[0][0] # root obtained, change other zero-in-degree node logger.debug("Root obtained ") logger.debug(root) for node in zero_degree_nodes: # print('zero_degree_nodes:', node) if root.ID == node.ID: continue if is_conj_node(node, dep_graph): # print('is_conj_node:',node,' !!!!!!!!!!') for child, rel in list(oia_graph.children(node)): label = rel.label if "pred.arg." in label: arg_no = label.split(".")[-1] new_rel = "as:pred.arg." + arg_no oia_graph.remove_relation(node, child) oia_graph.add_relation(child, node, new_rel) continue ref_childs = [ child for child, rel in oia_graph.children(node) if rel.label == "ref" ] if ref_childs: for child in ref_childs: oia_graph.remove_relation(node, child) oia_graph.add_relation(child, node, "as:ref") continue in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [ n for n, degree in in_degrees if degree == 0 and n.ID != root.ID ] while len(zero_degree_nodes) > 0: logger.debug("we found zero_degree_nodes: ") for node in zero_degree_nodes: logger.debug(node) root_offsprings = set(oia_graph.offsprings(root)) logger.debug("root offsprings :") for n in root_offsprings: logger.debug(n) intersections = [] for node in zero_degree_nodes: node_offspring = set(oia_graph.offsprings(node)) logger.debug("node offsprings :") for n in node_offspring: logger.debug(n) intersection = root_offsprings.intersection(node_offspring) logger.debug("we found {0} initial intersection :".format( len(intersection))) for n in intersection: logger.debug(n) if intersection: top_intersection_point = None parents_to_root = None parents_to_other = None for x in intersection: parents = set([n for n, l in oia_graph.parents(x)]) if not parents.intersection(intersection): top_intersection_point = x parents_to_root = parents.intersection(root_offsprings) parents_to_other = parents.intersection(node_offspring) break if top_intersection_point is None: logger.error("It seems we have a problem ") continue logger.debug("we found a intersections: ") logger.debug(top_intersection_point) logger.debug("Its parents to root: ") for x in parents_to_root: logger.debug(x) logger.debug("Its parents to other: ") for x in parents_to_other: logger.debug(x) intersections.append((top_intersection_point, parents_to_root, parents_to_other)) if len(intersections) == 0: logger.error("seems we have disconnected compoenent") break # raise Exception("Unexpected situation") for intersection_point, parents_to_root, parents_to_other in intersections: # if node not in set([n for n, l in oia_graph.parents(intersection_point)]): # logger.error("Deep intersection point, currently cannot process") # # raise Exception("Deep intersection point, currently cannot process") # continue for node in parents_to_other: if isinstance(node, OIAAuxNode) and node.label == "LIST": logger.error("lets see what happens for LIST") if len(list(oia_graph.parents(node))) != 0: logger.error( "it seems different with what we have thought for LIST " ) relation = oia_graph.get_edge(node, intersection_point) oia_graph.remove_relation(node, intersection_point) oia_graph.add_relation(intersection_point, node, "as:" + relation.label) # for parent, l in list(oia_graph.parents(intersection_point)): # if parent != node: # oia_graph.remove_relation(parent, intersection_point) # oia_graph.add_relation(parent, node, l.label) elif (isinstance(node, OIAAuxNode) and node.label == "WHETHER"): # parents_to_root = list(oia_graph.parents_on_path(intersection_point, root)) if len(list(oia_graph.parents(node))) != 0: logger.error( "it seems different with what we have thought for WHETHER " ) for parent in parents_to_root: relation = oia_graph.get_edge(parent, intersection_point) oia_graph.remove_relation(parent, intersection_point) oia_graph.add_relation(parent, node, relation.label) else: relation = oia_graph.get_edge(node, intersection_point) oia_graph.remove_relation(node, intersection_point) oia_graph.add_relation(intersection_point, node, "as:" + relation.label) in_degrees = [(node, oia_graph.g.in_degree(node.ID)) for node in oia_graph.nodes()] zero_degree_nodes = [ n for n, degree in in_degrees if degree == 0 and n.ID != root.ID ]
def forward(self, oia_graph: OIAGraph, dep_graph: DependencyGraph=None, **kwargs): """ note that this only process the situation that @param oia_graph: @type oia_graph: @param kwargs: @type kwargs: @return: @rtype: """ for node in list(oia_graph.nodes()): node_words = oia_graph.node_text(node).split(" ") if not any([x in {"and", "or"} for x in node_words]): continue if any(["{" in x and "}" in x for x in node_words]): continue arguments = [] conjs = [] current_words = [] for span in node.spans: if isinstance(span, str): current_words.append(span) else: start, end = span for idx in range(start, end + 1): if oia_graph.words[idx].lower() in {"and", "or"}: arguments.append(current_words) conjs.append(idx) current_words = [] else: current_words.append(idx) arguments.append(current_words) logger.debug("conj found = {}".format(conjs)) logger.debug("argument found = {}".format(arguments)) if all(not arg or all(oia_graph.words[x] in {",", ";", ".", " "} for x in arg) for arg in arguments): # single words continue if len(conjs) == 1: conj_words = conjs else: # len(conjs) >= 2: logger.warning("We are processing conjs with more than two args") conj_words = ['{1}'] for idx, conj in enumerate(conjs): conj_words.append(conj) conj_words.append("{{{0}}}".format(idx + 2)) conj_node = oia_graph.add_words(conj_words) for idx, arg in enumerate(arguments): arg_node = oia_graph.add_words(arg) oia_graph.add_relation(conj_node, arg_node, "pred.arg.{0}".format(idx + 1)) for p, l in list(oia_graph.parents(node)): oia_graph.add_relation(p, conj_node, l.label) oia_graph.remove_relation(p, node) for c, l in list(oia_graph.children(node)): oia_graph.add_relation(conj_node, c, l.label) oia_graph.remove_relation(node, c) oia_graph.remove_node(node)