Ejemplo n.º 1
0
    def parse_string(self, s, concepts=True):
        """
        Parse the string s and return a new hypergraph.
        """

        # Constants to identify items on the stack
        PNODE = 1  # Parent node
        CNODE = 2  # Child node
        EDGE = 3  # Hyperedge

        hgraph = Hgraph()

        stack = []
        state = 0

        self.id_count = 0
        self.nt_id_count = 0
        self.ext_id_count = 0
        self.seen_nodes = set()
        self.explicit_ext_ids = False

        # States of the finite state parser
        #0, top level
        #1, expecting head nodename
        #2, expecting edge label or node
        #3, expecting further child nodes or right paren
        #4, expecting saw edge label, expecting child node, edge label, right paren

        def get_reentrance(s):
            re_pattern = re.compile('[^:](_[0-9]+)\.')
            re_list = re_pattern.findall(s)
            #print re_list
            self.reentrance_indexes.update(re_list)

        def insert_node(node, root=False):
            # Insert a node into the AMR
            ident, label, ext_id = node
            ignoreme = hgraph[ident]  #Initialize dictionary for this node
            hgraph.node_to_concepts[ident] = label
            if ext_id is not None:
                if ident in hgraph.external_nodes and hgraph.external_nodes[
                        ident] != ext_id:
                    raise ParserError, "Incompatible external node IDs for node %s." % ident
                hgraph.external_nodes[ident] = ext_id
                hgraph.rev_external_nodes[ext_id] = ident
            if root:
                hgraph.roots.append(ident)

        def pop_and_transition():
            # Create all edges in a group from the stack, attach them to the
            # graph and then transition to the appropriate state in the FSA
            edges = []
            while stack[-1][0] != PNODE:  # Pop all edges
                children = []
                while stack[-1][0] == CNODE:  # Pop all nodes in hyperedge
                    itemtype, node = stack.pop()
                    insert_node(node)
                    children.append(node)
                assert stack[-1][0] == EDGE
                itemtype, edgelabel = stack.pop()
                edges.append((edgelabel, children))

            # Construct the hyperedge
            itemtype, parentnode = stack.pop()
            for edgelabel, children in edges:
                hypertarget = []  # build hyperedge tail
                for ident, label, ext_id in children:
                    hypertarget.append(ident)
                hypertarget.reverse()
                hyperchild = tuple(hypertarget)

                if "$" in edgelabel:  # this is a nonterminal Edge
                    #print '***********non-terminal %s' % edgelabel
                    new_edge = NonterminalLabel.from_string(edgelabel)
                    if not new_edge.index:
                        new_edge.index = "_%i" % self.nt_id_count
                        self.nt_id_count = self.nt_id_count + 1
                else:
                    #print '***********terminal %s' % edgelabel
                    new_edge = edgelabel
                ident, label, ext_id = parentnode
                hgraph._add_triple(ident, new_edge, hyperchild)

            if stack:
                insert_node(parentnode)
                stack.append((CNODE, parentnode))
                state = 4
            else:
                insert_node(parentnode, root=True)
                state = 5

        get_reentrance(s)

        # Parser transitions start here
        #print 'begin'
        #print s
        #print 'end'
        for typ, token, pos in self.lexer.lex(s):
            #print typ, token, pos, state
            #log.info(typ+ '  ,  '+ token+ '  , '+ (str)(pos))

            if state == 0:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    insert_node(self.parse_node(token), root=True)
                    state = 5
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 1:
                if typ == LexTypes.NODE:
                    stack.append(
                        (PNODE, self.parse_node(token)))  # Push head node
                    state = 2
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 2:
                if typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.NODE:
                    stack.append(
                        (EDGE, ""))  # No edge specified, assume empty label
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.LPAR:
                    stack.append(
                        (EDGE, ""))  # No edge specified, assume empty label
                    state = 1
                elif typ == LexTypes.RPAR:
                    itemtype, node = stack.pop()
                    assert itemtype == PNODE
                    if stack:
                        insert_node(node)
                        stack.append((CNODE, node))
                        state = 3
                    else:
                        insert_node(node, root=True)
                        state = 5
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 3:
                if typ == LexTypes.RPAR:  # Pop from stack and add edges
                    pop_and_transition()
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.LPAR:
                    state = 1
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 4:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                elif typ == LexTypes.RPAR:  # Pop from stack and add edges
                    pop_and_transition()
                else:
                    raise ParserError, "Unexpected token %s at position %i." % (
                        token, pos)

            elif state == 5:
                raise ParserError, "Unexpected token %s at position %i." % (
                    token, pos)

        # Normalize external nodes
        new_ext_nodes = {}
        new_rev_ext_nodes = {}
        i = 0
        for node, index in sorted(hgraph.external_nodes.items(),
                                  key=lambda (n, i): i):
            new_ext_nodes[node] = i
            new_rev_ext_nodes[i] = node
            i = i + 1

        hgraph.external_nodes = new_ext_nodes
        hgraph.rev_external_nodes = new_rev_ext_nodes
        return hgraph
Ejemplo n.º 2
0
    def parse_string(self, s, concepts = True):
        """
        Parse the string s and return a new hypergraph. 
        """

        # Constants to identify items on the stack
        PNODE = 1 # Parent node
        CNODE = 2 # Child node
        EDGE = 3  # Hyperedge 

        hgraph = Hgraph()
        
        stack = []
        state = 0

        self.id_count = 0
        self.nt_id_count = 0
        self.ext_id_count = 0
        self.seen_nodes = set()
        self.explicit_ext_ids = False                 
 
        # States of the finite state parser
        #0, top level
        #1, expecting head nodename
        #2, expecting edge label or node
        #3, expecting further child nodes or right paren
        #4, expecting saw edge label, expecting child node, edge label, right paren 

        def insert_node(node, root=False):
            # Insert a node into the AMR
            ident, label, ext_id = node                              
            ignoreme = hgraph[ident] #Initialize dictionary for this node
            hgraph.node_to_concepts[ident] = label
            if ext_id is not None:                
                if ident in hgraph.external_nodes and hgraph.external_nodes[ident] != ext_id:
                    raise ParserError, "Incompatible external node IDs for node %s." % ident
                hgraph.external_nodes[ident] = ext_id
                hgraph.rev_external_nodes[ext_id] = ident
            if root: 
                hgraph.roots.append(ident)
                
        def pop_and_transition():
            # Create all edges in a group from the stack, attach them to the 
            # graph and then transition to the appropriate state in the FSA
            edges = []
            while stack[-1][0] != PNODE: # Pop all edges
                children = []
                while stack[-1][0] == CNODE: # Pop all nodes in hyperedge
                    itemtype, node = stack.pop()
                    insert_node(node) 
                    children.append(node)
                assert stack[-1][0] == EDGE 
                itemtype, edgelabel = stack.pop()
                edges.append((edgelabel, children))
              
            # Construct the hyperedge 
            itemtype, parentnode = stack.pop()
            for edgelabel, children in edges: 
                hypertarget = [] # build hyperedge tail 
                for ident, label, ext_id in children:
                    hypertarget.append(ident) 
                hypertarget.reverse()
                hyperchild = tuple(hypertarget)    
                
                if "$" in edgelabel: # this is a nonterminal Edge 
                    new_edge = NonterminalLabel.from_string(edgelabel)
                    if not new_edge.index:
                        new_edge.index = "_%i" %self.nt_id_count
                        self.nt_id_count = self.nt_id_count + 1
                else: 
                    new_edge = edgelabel
                ident, label, ext_id = parentnode
                hgraph._add_triple(ident, new_edge, hyperchild) 
               
            if stack:
                insert_node(parentnode)
                stack.append((CNODE, parentnode))
                state = 4
            else:    
                insert_node(parentnode, root = True)
                state = 5

        # Parser transitions start here
        for typ, token, pos in self.lexer.lex(s):

            if state == 0:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    insert_node(self.parse_node(token), root=True)               
                    state = 5
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)
             
            elif state == 1: 
                if typ == LexTypes.NODE:
                    stack.append((PNODE, self.parse_node(token))) # Push head node
                    state = 2
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 2:
                if typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.NODE:
                    stack.append((EDGE, "")) # No edge specified, assume empty label
                    stack.append((CNODE, self.parse_node(token))) 
                    state = 3
                elif typ == LexTypes.LPAR:
                    stack.append((EDGE, "")) # No edge specified, assume empty label
                    state = 1
                elif typ == LexTypes.RPAR:
                    itemtype, node  = stack.pop()
                    assert itemtype == PNODE
                    if stack:
                        insert_node(node)
                        stack.append((CNODE, node))
                        state = 3
                    else:    
                        insert_node(node, root = True)
                        state = 5
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 3:
                if typ == LexTypes.RPAR: # Pop from stack and add edges
                    pop_and_transition(); 
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token)))
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                    state = 4
                elif typ == LexTypes.LPAR:
                    state = 1
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)

            elif state == 4:
                if typ == LexTypes.LPAR:
                    state = 1
                elif typ == LexTypes.NODE:
                    stack.append((CNODE, self.parse_node(token))) 
                    state = 3
                elif typ == LexTypes.EDGELABEL:
                    stack.append((EDGE, token[1:]))
                elif typ == LexTypes.RPAR: # Pop from stack and add edges
                    pop_and_transition(); 
                else: raise ParserError, "Unexpected token %s at position %i." % (token, pos)
            
            elif state == 5:
                raise ParserError, "Unexpected token %s at position %i." % (token, pos)

        # Normalize external nodes
        new_ext_nodes = {}
        new_rev_ext_nodes = {}
        i = 0
        for node, index in sorted(hgraph.external_nodes.items(), key = lambda (n, i): i):
            new_ext_nodes[node] = i 
            new_rev_ext_nodes[i] = node
            i = i + 1       
 
        hgraph.external_nodes = new_ext_nodes
        hgraph.rev_external_nodes = new_rev_ext_nodes
        return hgraph