def parse_string(self, s, concepts=True): """ Parse the string s and return a new abstract meaning representation. @concepts if True, method returns an L{Hgraph} object containing concept labels. """ PNODE = 1 CNODE = 2 EDGE = 3 amr = Hgraph() stack = [] state = 0 #0, top leve #1, expecting source nodename #2, expecting concept name or edge label #3, lexpecting concept name #4, expecting edge label #5, expecting expression, node name or literal string, quantity or special symbol #6, expecting right paren or more target nodes #7, expecting right paren for type, token, pos in self.lexer.lex(s): if state == 0: if type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 1: if type == LexTypes.IDENTIFIER: stack.append((PNODE, token, None)) # Push source node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 2: if type == LexTypes.SLASH: state = 3 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 3: if type == LexTypes.IDENTIFIER: assert stack[-1][0] == PNODE nodelabel = stack.pop()[1] stack.append( (PNODE, nodelabel, token)) # Push new source node with concept label state = 4 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 4: if type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if concepts and ( not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 5: if type == LexTypes.LPAR: state = 1 elif type == LexTypes.QUANTITY: stack.append((CNODE, Quantity(token), None)) state = 6 elif type == LexTypes.STRLITERAL: stack.append((CNODE, StrLiteral(token[1:-1]), None)) state = 6 elif type == LexTypes.LITERAL: stack.append((CNODE, Literal(token[1:]), None)) state = 6 elif type == LexTypes.IDENTIFIER: stack.append( (CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.EDGELABEL: # Unary edge stack.append((CNODE, None, None)) stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: # Unary edge stack.append((CNODE, None, None)) edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][ 0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop( ) if childnodelabel is not None and childnodelabel[ 0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and ( not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget = [] # build hyperedge destination for node, concept in children: if node is not None: if concepts and ( not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 6: if type == LexTypes.RPAR: # Pop from stack and add edges edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][ 0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop( ) if childnodelabel is not None and childnodelabel[ 0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and ( not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget = [] # build hyperedge destination for node, concept in children: if node is not None: if concepts and ( not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) elif type == LexTypes.COMMA: state = 7 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 7: if type == LexTypes.IDENTIFIER: stack.append( (CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) return amr
def parse_string(self, s, concepts = True): """ Parse the string s and return a new abstract meaning representation. @concepts if True, method returns an L{Hgraph} object containing concept labels. """ PNODE = 1 CNODE = 2 EDGE = 3 amr = Hgraph() stack = [] state = 0 #0, top leve #1, expecting source nodename #2, expecting concept name or edge label #3, lexpecting concept name #4, expecting edge label #5, expecting expression, node name or literal string, quantity or special symbol #6, expecting right paren or more target nodes #7, expecting right paren for type, token, pos in self.lexer.lex(s): if state == 0: if type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 1: if type == LexTypes.IDENTIFIER: stack.append((PNODE, token, None)) # Push source node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 2: if type == LexTypes.SLASH: state = 3 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 3: if type == LexTypes.IDENTIFIER: assert stack[-1][0] == PNODE nodelabel = stack.pop()[1] stack.append((PNODE, nodelabel, token)) # Push new source node with concept label state = 4 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 4: if type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if concepts and (not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 5: if type == LexTypes.LPAR: state = 1 elif type == LexTypes.QUANTITY: stack.append((CNODE, Quantity(token), None)) state = 6 elif type == LexTypes.STRLITERAL: stack.append((CNODE, StrLiteral(token[1:-1]), None)) state = 6 elif type == LexTypes.LITERAL: stack.append((CNODE, Literal(token[1:]), None)) state = 6 elif type == LexTypes.IDENTIFIER: stack.append((CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.EDGELABEL: # Unary edge stack.append((CNODE, None, None)) stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: # Unary edge stack.append((CNODE, None, None)) edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop() if childnodelabel is not None and childnodelabel[0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget =[] # build hyperedge destination for node, concept in children: if node is not None: if concepts and (not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 6: if type == LexTypes.RPAR: # Pop from stack and add edges edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop() if childnodelabel is not None and childnodelabel[0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget =[] # build hyperedge destination for node, concept in children: if node is not None: if concepts and (not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) elif type == LexTypes.COMMA: state = 7 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 7: if type == LexTypes.IDENTIFIER: stack.append((CNODE, token, None)) # Push new source node with concept label state = 6 elif type== LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) return amr