Example #1
0
    def parse_string(self, s, concepts=True):
        """
        Parse the string s and return a new abstract meaning representation.

        @concepts if True, method returns an L{Hgraph} object containing concept labels. 
        """

        PNODE = 1
        CNODE = 2
        EDGE = 3

        amr = Hgraph()
        stack = []
        state = 0

        #0, top leve
        #1, expecting source nodename
        #2, expecting concept name or edge label
        #3, lexpecting concept name
        #4, expecting edge label
        #5, expecting expression, node name or literal string, quantity or special symbol
        #6, expecting right paren or more target nodes
        #7, expecting right paren

        for type, token, pos in self.lexer.lex(s):

            if state == 0:
                if type == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 1:
                if type == LexTypes.IDENTIFIER:
                    stack.append((PNODE, token, None))  # Push source node
                    state = 2
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 2:
                if type == LexTypes.SLASH:
                    state = 3
                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@':
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel]  # add only the node
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:
                        amr.roots.append(parentnodelabel)
                        state = 0

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 3:
                if type == LexTypes.IDENTIFIER:
                    assert stack[-1][0] == PNODE
                    nodelabel = stack.pop()[1]
                    stack.append(
                        (PNODE, nodelabel,
                         token))  # Push new source node with concept label
                    state = 4
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 4:
                if type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@':
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel]  # add only the node
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentnodelabel is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:
                        amr.roots.append(parentnodelabel)
                        state = 0
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 5:
                if type == LexTypes.LPAR:
                    state = 1
                elif type == LexTypes.QUANTITY:
                    stack.append((CNODE, Quantity(token), None))
                    state = 6
                elif type == LexTypes.STRLITERAL:
                    stack.append((CNODE, StrLiteral(token[1:-1]), None))
                    state = 6
                elif type == LexTypes.LITERAL:
                    stack.append((CNODE, Literal(token[1:]), None))
                    state = 6
                elif type == LexTypes.IDENTIFIER:
                    stack.append(
                        (CNODE, token,
                         None))  # Push new source node with concept label
                    state = 6
                elif type == LexTypes.EDGELABEL:  # Unary edge
                    stack.append((CNODE, None, None))
                    stack.append((EDGE, token[1:]))
                    state = 5

                elif type == LexTypes.RPAR:  # Unary edge
                    stack.append((CNODE, None, None))
                    edges = []
                    while stack[-1][0] != PNODE:  # Pop all edges
                        children = []
                        while stack[-1][
                                0] == CNODE:  # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop(
                            )
                            if childnodelabel is not None and childnodelabel[
                                    0] == '@':  #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))

                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentconcept is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@':  #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges:

                        hypertarget = []  # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (
                                        not node in amr.node_to_concepts
                                        or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node)
                        hyperchild = tuple(hypertarget)

                        if edgelabel[0] == '#':  # this is a nonterminal Edge
                            edgelabel = NonterminalLabel(edgelabel[1:])

                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else:
                        state = 0
                        amr.roots.append(parentnodelabel)

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 6:
                if type == LexTypes.RPAR:  # Pop from stack and add edges

                    edges = []

                    while stack[-1][0] != PNODE:  # Pop all edges
                        children = []
                        while stack[-1][
                                0] == CNODE:  # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop(
                            )
                            if childnodelabel is not None and childnodelabel[
                                    0] == '@':  #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))

                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (
                            not parentnodelabel in amr.node_to_concepts
                            or parentconcept is not None):
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@':  #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges:

                        hypertarget = []  # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (
                                        not node in amr.node_to_concepts
                                        or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node)
                        hyperchild = tuple(hypertarget)

                        if edgelabel[0] == '#':  # this is a nonterminal Edge
                            edgelabel = NonterminalLabel(edgelabel[1:])
                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else:
                        state = 0
                        amr.roots.append(parentnodelabel)

                elif type == LexTypes.COMMA:
                    state = 7

                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5

                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 7:
                if type == LexTypes.IDENTIFIER:
                    stack.append(
                        (CNODE, token,
                         None))  # Push new source node with concept label
                    state = 6
                elif type == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

        return amr
    def parse_string(self, s, concepts = True):
        """
        Parse the string s and return a new abstract meaning representation.

        @concepts if True, method returns an L{Hgraph} object containing concept labels. 
        """

        PNODE = 1
        CNODE = 2
        EDGE = 3

        amr = Hgraph()
        stack = []
        state = 0

        #0, top leve
        #1, expecting source nodename
        #2, expecting concept name or edge label
        #3, lexpecting concept name 
        #4, expecting edge label
        #5, expecting expression, node name or literal string, quantity or special symbol   
        #6, expecting right paren or more target nodes
        #7, expecting right paren

        for type, token, pos in self.lexer.lex(s):

            if state == 0:
                if type == LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 1:
                if type == LexTypes.IDENTIFIER:
                    stack.append((PNODE, token, None)) # Push source node
                    state = 2
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 2:
                if type == LexTypes.SLASH:
                    state = 3
                elif type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@': 
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo =  amr[parentnodelabel] # add only the node
                    if stack:
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:    
                        amr.roots.append(parentnodelabel)
                        state = 0

                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 3:
                if type == LexTypes.IDENTIFIER:
                    assert stack[-1][0] == PNODE
                    nodelabel = stack.pop()[1]
                    stack.append((PNODE, nodelabel, token)) # Push new source node with concept label
                    state = 4
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 4:
                if type == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 5
                elif type == LexTypes.RPAR:
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    assert forgetme == PNODE
                    if parentnodelabel[0] == '@': 
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    foo = amr[parentnodelabel] # add only the node
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept    
                    if stack: 
                        stack.append((CNODE, parentnodelabel, parentconcept))
                        state = 6
                    else:    
                        amr.roots.append(parentnodelabel)
                        state = 0
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 5:
                if type == LexTypes.LPAR:
                    state = 1
                elif type == LexTypes.QUANTITY:
                    stack.append((CNODE, Quantity(token), None))
                    state = 6
                elif type == LexTypes.STRLITERAL:
                    stack.append((CNODE, StrLiteral(token[1:-1]), None))
                    state = 6
                elif type == LexTypes.LITERAL:
                    stack.append((CNODE, Literal(token[1:]), None)) 
                    state = 6
                elif type == LexTypes.IDENTIFIER: 
                    stack.append((CNODE, token, None)) # Push new source node with concept label
                    state = 6
                elif type == LexTypes.EDGELABEL:  # Unary edge
                    stack.append((CNODE, None, None))
                    stack.append((EDGE, token[1:]))
                    state = 5
                        
                elif type == LexTypes.RPAR: # Unary edge
                    stack.append((CNODE, None, None))             
                    edges = []
                    while stack[-1][0] != PNODE: # Pop all edges
                        children = []
                        while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop()
                            if childnodelabel is not None and childnodelabel[0] == '@': #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE 
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))
                   
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@': #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges: 

                        hypertarget =[] # build hyperedge destination
                        for node, concept in children:
                            if node is not None:
                                if concepts and (not node in amr.node_to_concepts or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node) 
                        hyperchild = tuple(hypertarget)    
                        
                        if edgelabel[0] == '#': # this is a nonterminal Edge 
                            edgelabel = NonterminalLabel(edgelabel[1:])

                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else: 
                        state = 0 
                        amr.roots.append(parentnodelabel)
                     
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 6:
                if type == LexTypes.RPAR: # Pop from stack and add edges

                    edges = []
                    
                    while stack[-1][0] != PNODE: # Pop all edges
                        children = []
                        while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge
                            forgetme, childnodelabel, childconcept = stack.pop()
                            if childnodelabel is not None and childnodelabel[0] == '@': #child is external node
                                childnodelabel = childnodelabel[1:]
                                amr.external_nodes.append(childnodelabel)
                            children.append((childnodelabel, childconcept))

                        assert stack[-1][0] == EDGE 
                        forgetme, edgelabel = stack.pop()
                        edges.append((edgelabel, children))
                   
                    forgetme, parentnodelabel, parentconcept = stack.pop()
                    if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): 
                        amr.node_to_concepts[parentnodelabel] = parentconcept
                    if parentnodelabel[0] == '@': #parent is external node
                        parentnodelabel = parentnodelabel[1:]
                        amr.external_nodes.append(parentnodelabel)
                    for edgelabel, children in edges: 

                        hypertarget =[] # build hyperedge destination
                        for node, concept in children:
                            if node is not None: 
                                if concepts and (not node in amr.node_to_concepts or concept is not None):
                                    amr.node_to_concepts[node] = concept
                                hypertarget.append(node) 
                        hyperchild = tuple(hypertarget)    
                        
                        if edgelabel[0] == '#': # this is a nonterminal Edge 
                            edgelabel = NonterminalLabel(edgelabel[1:])
                        amr._add_triple(parentnodelabel, edgelabel, hyperchild)

                    if stack:
                        state = 6
                        stack.append((CNODE, parentnodelabel, parentconcept))
                    else: 
                        state = 0 
                        amr.roots.append(parentnodelabel)
                        
                elif type == LexTypes.COMMA:
                    state = 7

                elif type == LexTypes.EDGELABEL: 
                    stack.append((EDGE, token[1:]))
                    state = 5

                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 7: 
                if type == LexTypes.IDENTIFIER:
                    stack.append((CNODE, token, None)) # Push new source node with concept label
                    state = 6
                elif type== LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

        return amr