def parse_string(self, s, concepts=True): """ Parse the string s and return a new hypergraph. """ # Constants to identify items on the stack PNODE = 1 # Parent node CNODE = 2 # Child node EDGE = 3 # Hyperedge hgraph = Hgraph() stack = [] state = 0 self.id_count = 0 self.nt_id_count = 0 self.ext_id_count = 0 self.seen_nodes = set() self.explicit_ext_ids = False # States of the finite state parser #0, top level #1, expecting head nodename #2, expecting edge label or node #3, expecting further child nodes or right paren #4, expecting saw edge label, expecting child node, edge label, right paren def get_reentrance(s): re_pattern = re.compile('[^:](_[0-9]+)\.') re_list = re_pattern.findall(s) #print re_list self.reentrance_indexes.update(re_list) def insert_node(node, root=False): # Insert a node into the AMR ident, label, ext_id = node ignoreme = hgraph[ident] #Initialize dictionary for this node hgraph.node_to_concepts[ident] = label if ext_id is not None: if ident in hgraph.external_nodes and hgraph.external_nodes[ ident] != ext_id: raise ParserError, "Incompatible external node IDs for node %s." % ident hgraph.external_nodes[ident] = ext_id hgraph.rev_external_nodes[ext_id] = ident if root: hgraph.roots.append(ident) def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge #print '***********non-terminal %s' % edgelabel new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" % self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: #print '***********terminal %s' % edgelabel new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root=True) state = 5 get_reentrance(s) # Parser transitions start here #print 'begin' #print s #print 'end' for typ, token, pos in self.lexer.lex(s): #print typ, token, pos, state #log.info(typ+ ' , '+ token+ ' , '+ (str)(pos)) if state == 0: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: insert_node(self.parse_node(token), root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 1: if typ == LexTypes.NODE: stack.append( (PNODE, self.parse_node(token))) # Push head node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 2: if typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.NODE: stack.append( (EDGE, "")) # No edge specified, assume empty label stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.LPAR: stack.append( (EDGE, "")) # No edge specified, assume empty label state = 1 elif typ == LexTypes.RPAR: itemtype, node = stack.pop() assert itemtype == PNODE if stack: insert_node(node) stack.append((CNODE, node)) state = 3 else: insert_node(node, root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 3: if typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition() elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 4: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) elif typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition() else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 5: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) # Normalize external nodes new_ext_nodes = {} new_rev_ext_nodes = {} i = 0 for node, index in sorted(hgraph.external_nodes.items(), key=lambda (n, i): i): new_ext_nodes[node] = i new_rev_ext_nodes[i] = node i = i + 1 hgraph.external_nodes = new_ext_nodes hgraph.rev_external_nodes = new_rev_ext_nodes return hgraph
def parse_string(self, s, concepts = True): """ Parse the string s and return a new hypergraph. """ # Constants to identify items on the stack PNODE = 1 # Parent node CNODE = 2 # Child node EDGE = 3 # Hyperedge hgraph = Hgraph() stack = [] state = 0 self.id_count = 0 self.nt_id_count = 0 self.ext_id_count = 0 self.seen_nodes = set() self.explicit_ext_ids = False # States of the finite state parser #0, top level #1, expecting head nodename #2, expecting edge label or node #3, expecting further child nodes or right paren #4, expecting saw edge label, expecting child node, edge label, right paren def insert_node(node, root=False): # Insert a node into the AMR ident, label, ext_id = node ignoreme = hgraph[ident] #Initialize dictionary for this node hgraph.node_to_concepts[ident] = label if ext_id is not None: if ident in hgraph.external_nodes and hgraph.external_nodes[ident] != ext_id: raise ParserError, "Incompatible external node IDs for node %s." % ident hgraph.external_nodes[ident] = ext_id hgraph.rev_external_nodes[ext_id] = ident if root: hgraph.roots.append(ident) def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" %self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root = True) state = 5 # Parser transitions start here for typ, token, pos in self.lexer.lex(s): if state == 0: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: insert_node(self.parse_node(token), root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 1: if typ == LexTypes.NODE: stack.append((PNODE, self.parse_node(token))) # Push head node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 2: if typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.NODE: stack.append((EDGE, "")) # No edge specified, assume empty label stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.LPAR: stack.append((EDGE, "")) # No edge specified, assume empty label state = 1 elif typ == LexTypes.RPAR: itemtype, node = stack.pop() assert itemtype == PNODE if stack: insert_node(node) stack.append((CNODE, node)) state = 3 else: insert_node(node, root = True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 3: if typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition(); elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 4: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) elif typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition(); else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 5: raise ParserError, "Unexpected token %s at position %i." % (token, pos) # Normalize external nodes new_ext_nodes = {} new_rev_ext_nodes = {} i = 0 for node, index in sorted(hgraph.external_nodes.items(), key = lambda (n, i): i): new_ext_nodes[node] = i new_rev_ext_nodes[i] = node i = i + 1 hgraph.external_nodes = new_ext_nodes hgraph.rev_external_nodes = new_rev_ext_nodes return hgraph