Beispiel #1
0
    def make_lexing_code(self):
        from pypy.rlib.parsing.codebuilder import Codebuilder
        result = Codebuilder()
        result.start_block("def recognize(runner, i):")
        result.emit("#auto-generated code, don't edit")
        result.emit("assert i >= 0")
        result.emit("input = runner.text")
        result.emit("state = 0")
        result.start_block("while 1:")
        state_to_chars = {}
        for (state, char), nextstate in self.transitions.iteritems():
            state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char)
        state_to_chars_sorted = state_to_chars.items()
        state_to_chars_sorted.sort()
        above = set()
        for state, nextstates in state_to_chars_sorted:
            above.add(state)
            for _ in result.start_block("if state == %s:" % (state, )):
                if state in self.final_states:
                    result.emit("runner.last_matched_index = i - 1")
                    result.emit("runner.last_matched_state = state")
                for _ in result.start_block("try:"):
                    result.emit("char = input[i]")
                    result.emit("i += 1")
                for _ in result.start_block("except IndexError:"):
                    result.emit("runner.state = %s" % (state, ))
                    if state in self.final_states:
                        result.emit("return i")
                    else:
                        result.emit("return ~i")
                elif_prefix = ""
                for nextstate, chars in nextstates.iteritems():
                    final = nextstate in self.final_states
                    compressed = compress_char_set(chars)
                    if nextstate in above:
                        continue_prefix = "continue"
                    else:
                        continue_prefix = ""
                    for i, (a, num) in enumerate(compressed):
                        if num < 3:
                            for charord in range(ord(a), ord(a) + num):
                                for _ in result.start_block("%sif char == %r:"
                                        % (elif_prefix, chr(charord))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                                if not elif_prefix:
                                    elif_prefix = "el"
                        else:
                            for _ in result.start_block(
                                "%sif %r <= char <= %r:" % (
                                    elif_prefix, a, chr(ord(a) + num - 1))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                            if not elif_prefix:
                                elif_prefix = "el"
                for _ in result.start_block("else:"):
                    result.emit("break")
        #print state_to_chars.keys()
        for state in range(self.num_states):
            if state in state_to_chars:
                continue
            assert state in self.final_states
        result.emit("""
runner.last_matched_state = state
runner.last_matched_index = i - 1
runner.state = state
if i == len(input):
    return i
else:
    return ~i
break""")
        result.end_block("while")
        result.emit("""
runner.state = state
return ~i""")
        result.end_block("def")
        result = result.get_code()
        while "\n\n" in result:
            result = result.replace("\n\n", "\n")
        #print result
        exec py.code.Source(result).compile()
        return recognize
Beispiel #2
0
 def make_code(self):
     from pypy.rlib.parsing.codebuilder import Codebuilder
     result = Codebuilder()
     result.start_block("def recognize(input):")
     result.emit("i = 0")
     result.emit("state = 0")
     result.start_block("while 1:")
     
     # state_to_chars is a dict containing the sets of 
     #   Ex: state_to_chars = { 0: set('a','b','c'), ...}
     state_to_chars = {}
     for (state, char), nextstate in self.transitions.iteritems():
         state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char)
     above = set()
     for state, nextstates in state_to_chars.iteritems():
         above.add(state)
         for _ in result.start_block("if state == %s:" % (state, )):
             for _ in result.start_block("if i < len(input):"):
                 result.emit("char = input[i]")
                 result.emit("i += 1")
             for _ in result.start_block("else:"):
                 if state in self.final_states:
                     result.emit("return True")
                 else:
                     result.emit("break")
             elif_prefix = ""
             for nextstate, chars in nextstates.iteritems():
                 final = nextstate in self.final_states
                 compressed = compress_char_set(chars)
                 if nextstate in above:
                     continue_prefix = "continue"
                 else:
                     continue_prefix = ""
                 for i, (a, num) in enumerate(compressed):
                     if num < 5:
                         for charord in range(ord(a), ord(a) + num):
                             for _ in result.start_block(
                                 "%sif char == %r:" % (
                                     elif_prefix, chr(charord))):
                                 result.emit("state = %s" % (nextstate, ))
                                 result.emit(continue_prefix)
                             if not elif_prefix:
                                 elif_prefix = "el"
                     else:
                         for _ in result.start_block(
                             "%sif %r <= char <= %r:" % (
                                 elif_prefix, a, chr(ord(a) + num - 1))):
                             result.emit("state = %s""" % (nextstate, ))
                             result.emit(continue_prefix)
                         if not elif_prefix:
                             elif_prefix = "el"
             for _ in result.start_block("else:"):
                 result.emit("break") 
     #print state_to_chars.keys()
     for state in range(self.num_states):
         if state in state_to_chars:
             continue
         for _ in result.start_block("if state == %s:" % (state, )):
             for _ in result.start_block("if i == len(input):"):
                 result.emit("return True")
             for _ in result.start_block("else:"):
                 result.emit("break")
     result.emit("break")
     result.end_block("while")
     result.emit("raise LexerError(input, state, i)")
     result.end_block("def")
     result = result.get_code()
     while "\n\n" in result:
         result = result.replace("\n\n", "\n")
     #print result
     d = {'LexerError': LexerError}
     exec py.code.Source(result).compile() in d
     return d['recognize']
Beispiel #3
0
    def make_lexing_code(self):
        from pypy.rlib.parsing.codebuilder import Codebuilder
        result = Codebuilder()
        result.start_block("def recognize(runner, i):")
        result.emit("#auto-generated code, don't edit")
        result.emit("assert i >= 0")
        result.emit("input = runner.text")
        result.emit("state = 0")
        result.start_block("while 1:")
        state_to_chars = {}
        for (state, char), nextstate in self.transitions.iteritems():
            state_to_chars.setdefault(state, {}).setdefault(nextstate,
                                                            set()).add(char)
        state_to_chars_sorted = state_to_chars.items()
        state_to_chars_sorted.sort()
        above = set()
        for state, nextstates in state_to_chars_sorted:
            above.add(state)
            for _ in result.start_block("if state == %s:" % (state, )):
                if state in self.final_states:
                    result.emit("runner.last_matched_index = i - 1")
                    result.emit("runner.last_matched_state = state")
                for _ in result.start_block("try:"):
                    result.emit("char = input[i]")
                    result.emit("i += 1")
                for _ in result.start_block("except IndexError:"):
                    result.emit("runner.state = %s" % (state, ))
                    if state in self.final_states:
                        result.emit("return i")
                    else:
                        result.emit("return ~i")
                elif_prefix = ""
                for nextstate, chars in nextstates.iteritems():
                    final = nextstate in self.final_states
                    compressed = compress_char_set(chars)
                    if nextstate in above:
                        continue_prefix = "continue"
                    else:
                        continue_prefix = ""
                    for i, (a, num) in enumerate(compressed):
                        if num < 3:
                            for charord in range(ord(a), ord(a) + num):
                                for _ in result.start_block(
                                        "%sif char == %r:" %
                                    (elif_prefix, chr(charord))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                                if not elif_prefix:
                                    elif_prefix = "el"
                        else:
                            for _ in result.start_block(
                                    "%sif %r <= char <= %r:" %
                                (elif_prefix, a, chr(ord(a) + num - 1))):
                                result.emit("state = %s" % (nextstate, ))
                                result.emit(continue_prefix)
                            if not elif_prefix:
                                elif_prefix = "el"
                for _ in result.start_block("else:"):
                    result.emit("break")
        #print state_to_chars.keys()
        for state in range(self.num_states):
            if state in state_to_chars:
                continue
            assert state in self.final_states
        result.emit("""
runner.last_matched_state = state
runner.last_matched_index = i - 1
runner.state = state
if i == len(input):
    return i
else:
    return ~i
break""")
        result.end_block("while")
        result.emit("""
runner.state = state
return ~i""")
        result.end_block("def")
        result = result.get_code()
        while "\n\n" in result:
            result = result.replace("\n\n", "\n")
        #print result
        exec py.code.Source(result).compile()
        return recognize
Beispiel #4
0
    def make_code(self):
        from pypy.rlib.parsing.codebuilder import Codebuilder
        result = Codebuilder()
        result.start_block("def recognize(input):")
        result.emit("i = 0")
        result.emit("state = 0")
        result.start_block("while 1:")

        # state_to_chars is a dict containing the sets of
        #   Ex: state_to_chars = { 0: set('a','b','c'), ...}
        state_to_chars = {}
        for (state, char), nextstate in self.transitions.iteritems():
            state_to_chars.setdefault(state, {}).setdefault(nextstate,
                                                            set()).add(char)
        above = set()
        for state, nextstates in state_to_chars.iteritems():
            above.add(state)
            for _ in result.start_block("if state == %s:" % (state, )):
                for _ in result.start_block("if i < len(input):"):
                    result.emit("char = input[i]")
                    result.emit("i += 1")
                for _ in result.start_block("else:"):
                    if state in self.final_states:
                        result.emit("return True")
                    else:
                        result.emit("break")
                elif_prefix = ""
                for nextstate, chars in nextstates.iteritems():
                    final = nextstate in self.final_states
                    compressed = compress_char_set(chars)
                    if nextstate in above:
                        continue_prefix = "continue"
                    else:
                        continue_prefix = ""
                    for i, (a, num) in enumerate(compressed):
                        if num < 5:
                            for charord in range(ord(a), ord(a) + num):
                                for _ in result.start_block(
                                        "%sif char == %r:" %
                                    (elif_prefix, chr(charord))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                                if not elif_prefix:
                                    elif_prefix = "el"
                        else:
                            for _ in result.start_block(
                                    "%sif %r <= char <= %r:" %
                                (elif_prefix, a, chr(ord(a) + num - 1))):
                                result.emit("state = %s" "" % (nextstate, ))
                                result.emit(continue_prefix)
                            if not elif_prefix:
                                elif_prefix = "el"
                for _ in result.start_block("else:"):
                    result.emit("break")
        #print state_to_chars.keys()
        for state in range(self.num_states):
            if state in state_to_chars:
                continue
            for _ in result.start_block("if state == %s:" % (state, )):
                for _ in result.start_block("if i == len(input):"):
                    result.emit("return True")
                for _ in result.start_block("else:"):
                    result.emit("break")
        result.emit("break")
        result.end_block("while")
        result.emit("raise LexerError(input, state, i)")
        result.end_block("def")
        result = result.get_code()
        while "\n\n" in result:
            result = result.replace("\n\n", "\n")
        #print result
        d = {'LexerError': LexerError}
        exec py.code.Source(result).compile() in d
        return d['recognize']
Beispiel #5
0
 def __init__(self):
     Codebuilder.__init__(self)
     self.initcode = []
     self.names = {}
     self.matchers = {}
Beispiel #6
0
 def __init__(self):
     Codebuilder.__init__(self)
     self.initcode = []
     self.names = {}
     self.matchers = {}