def rhs_to_hgraph(self): from common.cfg import NonterminalLabel from common.hgraph.hgraph import Hgraph nt_id_count = 0 hgraph = Hgraph() for node in self.rhs.nodes: # type: GraphNode label = "" try: ext_id = self.lhs.nodes.index(node) except ValueError: ext_id = None ident = "_" + node.name # Insert a node into the AMR ignoreme = hgraph[ident] # Initialize dictionary for this node hgraph.node_to_concepts[ident] = label if ext_id is not None: if ident in hgraph.external_nodes and hgraph.external_nodes[ ident] != ext_id: raise Exception( "Incompatible external node IDs for node %s." % ident) hgraph.external_nodes[ident] = ext_id hgraph.rev_external_nodes[ext_id] = ident if ext_id == 0: hgraph.roots.append(ident) for edge in self.rhs.edges: # type: HyperEdge hyperchild = tuple("_" + node.name for node in edge.nodes[1:]) ident = "_" + edge.nodes[0].name if "_" not in edge.label and not edge.label.startswith("ARG") \ and not edge.label.startswith("BV"): # this is a nonterminal Edge new_edge = NonterminalLabel(edge.label) if not new_edge.index: new_edge.index = "_%i" % nt_id_count nt_id_count = nt_id_count + 1 else: new_edge = edge.label hgraph._add_triple(ident, new_edge, hyperchild) return hgraph
def parse_string(s): """ Parse the RHS of a CFG rule. """ tokens = s.strip().split() res = [] nt_index = 0 for t in tokens: if "$" in t: new_token = NonterminalLabel.from_string(t) if not new_token.index: new_token.index = "_%i" % nt_index nt_index = nt_index + 1 else: new_token = t res.append(new_token) return res
def make_rule(frontier_pair, amr, tree, align, next_index): """ Creates a new rule with the given parts, and collapses these parts in the original graph and tree. """ constituent, amr_fragment = frontier_pair outside_edges = [ e for e in amr.triples() if e not in amr_fragment.triples() ] root_label = amr_fragment.root_edges()[0][1] if isinstance(root_label, NonterminalLabel): symbol = root_label.label m = re.match(r'(.+)_(.+)_(\d+)', symbol) role = m.group(1) else: if ':' in root_label: role, concept = root_label.split(':') else: role = root_label external_nodes = amr.find_external_nodes(amr_fragment) if len(external_nodes) == 0: external_nodes = [amr_fragment.find_leaves()[0]] # WARNING: destructive. Unfortunately we can't make the change any earlier. # TODO why? amr_fragment.external_nodes = external_nodes symbol = '%s_%s_%d' % (role, constituent.node, len(external_nodes)) label = NonterminalLabel(symbol, next_index) new_triple = (amr_fragment.roots[0], label, tuple(external_nodes)) new_amr = amr.collapse_fragment(amr_fragment, label) assert new_triple in new_amr.triples() new_tree = collapse_constituent(tree, constituent, label) new_alignments = collapse_alignments(align, amr_fragment, new_triple) rule = Rule(0, symbol, 1, amr_fragment, constituent, original_index=next_index) return rule, new_amr, new_tree, new_alignments, next_index + 1
def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" %self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root = True) state = 5
def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" % self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root=True) state = 5
def convert_chart(partition, external_nodes, nt, first=False): nt = NonterminalLabel(nt.label) # Get rid of the index if partition in seen: node = seen[partition] result.use_counts[node] += 1 return node leaves = chart.tree.leaves() edges_in_partition = [ graph_edge_list[i] for i in range(len(partition.edges)) if partition.edges[i] == 1 ] if not partition in chart: # leaf graph = Hgraph.from_triples(edges_in_partition, {}, warn=False) graph.roots = graph.find_roots() graph.roots.sort(lambda x, y: node_order[x] - node_order[y]) graph.external_nodes = external_nodes str_rhs = [ leaves[i] for i in range(partition.str_start, partition.str_end + 1) ] rule = Rule(0, nt.label, graph, tuple(str_rhs), 1) rule_id = self.add_rule(rule) fragment = fragment_counter[0] result[fragment] = [(rule_id, [])] result.use_counts[fragment] += 1 seen[partition] = fragment fragment_counter[0] += 1 return fragment poss = [] count = 0 for possibility in chart[partition]: count += 1 partition_graph = Hgraph.from_triples( edges_in_partition, {}, warn=False) # This is the parent graph partition_graph.roots = partition_graph.find_roots() partition_graph.roots.sort( lambda x, y: node_order[x] - node_order[y]) partition_graph.external_nodes = external_nodes children = [] #print partition_graph.to_amr_string() spans_to_nt = {} old_pgraph = partition_graph index = 1 for subpartition in possibility: #These are the different sub-constituents edges_in_subpartition = [ graph_edge_list[i] for i in range(len(subpartition.edges)) if subpartition.edges[i] == 1 ] if edges_in_subpartition: # Some constituents do not have any edges aligned to them sub_graph = Hgraph.from_triples(edges_in_subpartition, {}, warn=False) sub_graph.roots = sub_graph.find_roots() sub_graph.roots.sort( lambda x, y: node_order[x] - node_order[y]) external_node_list = partition_graph.find_external_nodes2( sub_graph) external_node_list.sort( lambda x, y: node_order[x] - node_order[y]) sub_external_nodes = dict([ (k, v) for v, k in enumerate(external_node_list) ]) sub_graph.external_nodes = sub_external_nodes sub_nt = NonterminalLabel( "%s%i" % (subpartition.phrase, len(sub_external_nodes)), index) children.append( convert_chart(subpartition, sub_external_nodes, sub_nt)) # Recursive call old_pgraph = partition_graph partition_graph = partition_graph.collapse_fragment2( sub_graph, sub_nt, external=external_node_list, warn=False) spans_to_nt[subpartition.str_start] = ( sub_nt, subpartition.str_end) else: sub_nt = NonterminalLabel(subpartition.phrase, index) #assert partition_graph.is_connected() index += 1 partition_graph.roots = partition_graph.find_roots() partition_graph.roots.sort( lambda x, y: node_order[x] - node_order[y]) # Assemble String rule str_rhs = [] i = partition.str_start while i <= partition.str_end: if i in spans_to_nt: new_nt, i = spans_to_nt[i] str_rhs.append(new_nt) else: str_rhs.append(leaves[i]) i = i + 1 rule = Rule(0, nt.label, partition_graph, tuple(str_rhs), 1) rule_id = self.add_rule(rule) poss.append((rule_id, children)) fragment = fragment_counter[0] result[fragment] = poss result.use_counts[fragment] += 1 seen[partition] = fragment fragment_counter[0] += 1 return fragment