def combine_by_postion(graph:DependencyGraph): for head_tk,tail_tk,start,end in combine_by_position_mapping: head_orig_is = [x["orig_index"] for x in graph.get_verticies_by_token(head_tk)] for head_og_i in head_orig_is: head = graph.get_vertex_by_orig_index(head_og_i) if head: for i in range(start,end): pos_tl = graph.get_vertex_by_orig_index(head["orig_index"]+i) if pos_tl and pos_tl["token"] == tail_tk: head["token"] = head_tk + " " + tail_tk graph.remove_vertex_with_child_promote(pos_tl)
def graph_combine(graph:DependencyGraph): loop = True while loop: loop = False for edge in graph.get_edges(): head,tl = graph.get_verticies(edge) tok_tup = (head["token"],tl["token"]) if tok_tup in combine_mapping.keys() and (combine_mapping[tok_tup] == [] or graph.get_label(edge) in combine_mapping[tok_tup]): graph.combine(edge) loop = True break
def remove_tokens(graph:DependencyGraph, edges, tokens, tail_only=False): filter_edges = [] for edge in edges: hd, tl = edge if not(tail_only) and graph.get_vertex_lable(hd) in tokens or \ graph.get_vertex_lable(tl) in tokens: continue elif not(graph.get_parents(hd) or is_new_expression(graph, edge[1])): continue else: filter_edges.append(edge) return filter_edges
def transform_graph(graph:DependencyGraph, root, context): orig_indicies = [x["orig_index"] for x in graph.get_children(root)] if root["token"] in rotate_up_tokens: parents = graph.get_parents(root.index) if parents: graph.rotate_edge((parents[0],root.index)) child_count = 0 for orig_index in orig_indicies: child = graph.get_vertex_by_orig_index(orig_index) next_context = get_next_context(context, root["token"], child_count) transform_graph(graph,child,next_context) child_count += 1
def filter_graph(graph:DependencyGraph, root, context): token = root["token"] root_orig_index = root["orig_index"] child_count = 0 for orig_index in [x["orig_index"] for x in graph.get_children(root)]: child = graph.get_vertex_by_orig_index(orig_index) next_context = get_next_context(context,token,child_count) filter_graph(graph, child, next_context) child_count += 1 # required since deleting earlier verticies deletes root root = graph.get_vertex_by_orig_index(root_orig_index) assert root["token"]==token, "token changed: " + token if token in filter_mapping and context not in filter_mapping[token]: graph.remove_vertex_with_child_promote(root)
def filter_if_both_aritmetic(graph:DependencyGraph, edges): filtered = [] for edge in edges: hd,tl = graph.get_verticies(edge) if not(hd["token"] in aritmetic_strings and tl["token"] in aritmetic_strings): filtered.append(edge) return filtered
def graph_break_2(graph:DependencyGraph): ht_disallowed_tokens = ["by","than","to"] t_dissallowed_tokens = ["number"] ccomp_edges = graph.get_edges_with_type("ccomp") filter_edges = remove_tokens(graph,ccomp_edges,ht_disallowed_tokens) filter_edges = remove_tokens(graph,filter_edges,t_dissallowed_tokens,tail_only=True) filter_edges = filter_if_both_aritmetic(graph,filter_edges) filter_edges = filter_out_double_edge(graph, filter_edges) for edge in filter_edges: identical_toks = edge_joins_identical_tokens(graph,edge) break_graph(graph,edge,enforce_order_on_split=identical_toks)
def get_rejected_root(graph:DependencyGraph, vertex, root1, root2): keywords = ["while", "for", "if", "else", "return"] root1_key = root1["token"] in keywords root2_key = root2["token"] in keywords if root1_key and not root2_key: return root2 elif root2_key and not root1_key: return root1 dist1 = graph.get_distance(root1,vertex) dist2 = graph.get_distance(root2,vertex) if dist1 > dist2: return root1 elif dist2 > dist1: return root2 # default: return root2
def get_dependency_graph(labels, dependencies): dg = DependencyGraph(pronoun_resolution(labels)) for dep in dependencies: if dep == None: continue head_pos = int(dep.head_position) dependent_pos = int(dep.dependent_position) dg.add_edge((head_pos,dependent_pos),dep.type) dg.print_tokens() transform_dependency_graph(dg) combine_by_postion(dg) return dg
def graph_break_3(graph:DependencyGraph): # This solves the problem where a graph has 2 roots # for a single node ie not a tree # Also removes any cycles by choosing to delete the last visited edge on traversal from root roots = graph.get_roots() transitive_closures = [set(graph.transitive_closure(root,[])) for root in roots] # compute all pairwise intersections for i,root1 in enumerate(roots): for j,root2 in enumerate(roots): if i >= j: continue intersect = transitive_closures[i] & transitive_closures[j] if intersect: intersect = list(intersect) # closest vertex is common to both roots vertex = graph.get_closest_to_root(root1,intersect) root = get_rejected_root(graph, vertex, root1, root2) path = graph.get_path(root,vertex) graph.remove_edge((path[-1].index,path[-2].index)) for root in graph.get_roots(): graph.remove_cycles(root)
def graph_break_1(graph:DependencyGraph): # token signifies new tree enforce_order_on_split = True for edge in graph.get_edges_with_dependent_token("return"): break_graph(graph,edge, True) for edge in graph.get_edges_with_dependent_token("for"): break_graph(graph,edge, True) for edge in graph.get_edges_with_dependent_token("while"): break_graph(graph,edge, True) for edge in graph.get_edges_with_dependent_token("if"): break_graph(graph,edge, True) for edge in graph.get_edges_with_dependent_token("else"): break_graph(graph, edge, True) for edge in graph.get_edges_with_dependent_token("set"): break_graph(graph, edge, True)
def is_new_expression(graph:DependencyGraph, vertex_id): return graph.get_token(vertex_id) in ["plus equal+s"]
def break_graph(graph:DependencyGraph,edge,enforce_order_on_split): # if child of dependent from earlier token switch it onto the head # if head after dependent attach it to dependent graph.remove_edge(edge) if not enforce_order_on_split: return head = edge[0] new_root_vert = edge[1] if head > new_root_vert: head_parents = graph.get_parents(head) for head_parent in head_parents: cur_edge_lab = graph.get_label((head_parent, head)) graph.add_edge((new_root_vert, head), cur_edge_lab) graph.remove_edge((head_parent, head)) if head_parents: head = head_parents[0] return_child_verts = graph.get_children_indices(edge[1]) for child in return_child_verts: if child < new_root_vert: # attach child to returns original parent cur_edge = (new_root_vert, child) new_edge = (head, child) edge_lab = graph.get_label(cur_edge) graph.add_edge(new_edge, edge_lab) graph.remove_edge(cur_edge)