def make_lexing_code(self): from pypy.rlib.parsing.codebuilder import Codebuilder result = Codebuilder() result.start_block("def recognize(runner, i):") result.emit("#auto-generated code, don't edit") result.emit("assert i >= 0") result.emit("input = runner.text") result.emit("state = 0") result.start_block("while 1:") state_to_chars = {} for (state, char), nextstate in self.transitions.iteritems(): state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char) state_to_chars_sorted = state_to_chars.items() state_to_chars_sorted.sort() above = set() for state, nextstates in state_to_chars_sorted: above.add(state) for _ in result.start_block("if state == %s:" % (state, )): if state in self.final_states: result.emit("runner.last_matched_index = i - 1") result.emit("runner.last_matched_state = state") for _ in result.start_block("try:"): result.emit("char = input[i]") result.emit("i += 1") for _ in result.start_block("except IndexError:"): result.emit("runner.state = %s" % (state, )) if state in self.final_states: result.emit("return i") else: result.emit("return ~i") elif_prefix = "" for nextstate, chars in nextstates.iteritems(): final = nextstate in self.final_states compressed = compress_char_set(chars) if nextstate in above: continue_prefix = "continue" else: continue_prefix = "" for i, (a, num) in enumerate(compressed): if num < 3: for charord in range(ord(a), ord(a) + num): for _ in result.start_block("%sif char == %r:" % (elif_prefix, chr(charord))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" else: for _ in result.start_block( "%sif %r <= char <= %r:" % ( elif_prefix, a, chr(ord(a) + num - 1))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" for _ in result.start_block("else:"): result.emit("break") #print state_to_chars.keys() for state in range(self.num_states): if state in state_to_chars: continue assert state in self.final_states result.emit(""" runner.last_matched_state = state runner.last_matched_index = i - 1 runner.state = state if i == len(input): return i else: return ~i break""") result.end_block("while") result.emit(""" runner.state = state return ~i""") result.end_block("def") result = result.get_code() while "\n\n" in result: result = result.replace("\n\n", "\n") #print result exec py.code.Source(result).compile() return recognize
def make_lexing_code(self): from pypy.rlib.parsing.codebuilder import Codebuilder result = Codebuilder() result.start_block("def recognize(runner, i):") result.emit("#auto-generated code, don't edit") result.emit("assert i >= 0") result.emit("input = runner.text") result.emit("state = 0") result.start_block("while 1:") state_to_chars = {} for (state, char), nextstate in self.transitions.iteritems(): state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char) state_to_chars_sorted = state_to_chars.items() state_to_chars_sorted.sort() above = set() for state, nextstates in state_to_chars_sorted: above.add(state) for _ in result.start_block("if state == %s:" % (state, )): if state in self.final_states: result.emit("runner.last_matched_index = i - 1") result.emit("runner.last_matched_state = state") for _ in result.start_block("try:"): result.emit("char = input[i]") result.emit("i += 1") for _ in result.start_block("except IndexError:"): result.emit("runner.state = %s" % (state, )) if state in self.final_states: result.emit("return i") else: result.emit("return ~i") elif_prefix = "" for nextstate, chars in nextstates.iteritems(): final = nextstate in self.final_states compressed = compress_char_set(chars) if nextstate in above: continue_prefix = "continue" else: continue_prefix = "" for i, (a, num) in enumerate(compressed): if num < 3: for charord in range(ord(a), ord(a) + num): for _ in result.start_block( "%sif char == %r:" % (elif_prefix, chr(charord))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" else: for _ in result.start_block( "%sif %r <= char <= %r:" % (elif_prefix, a, chr(ord(a) + num - 1))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" for _ in result.start_block("else:"): result.emit("break") #print state_to_chars.keys() for state in range(self.num_states): if state in state_to_chars: continue assert state in self.final_states result.emit(""" runner.last_matched_state = state runner.last_matched_index = i - 1 runner.state = state if i == len(input): return i else: return ~i break""") result.end_block("while") result.emit(""" runner.state = state return ~i""") result.end_block("def") result = result.get_code() while "\n\n" in result: result = result.replace("\n\n", "\n") #print result exec py.code.Source(result).compile() return recognize
def make_code(self): from pypy.rlib.parsing.codebuilder import Codebuilder result = Codebuilder() result.start_block("def recognize(input):") result.emit("i = 0") result.emit("state = 0") result.start_block("while 1:") # state_to_chars is a dict containing the sets of # Ex: state_to_chars = { 0: set('a','b','c'), ...} state_to_chars = {} for (state, char), nextstate in self.transitions.iteritems(): state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char) above = set() for state, nextstates in state_to_chars.iteritems(): above.add(state) for _ in result.start_block("if state == %s:" % (state, )): for _ in result.start_block("if i < len(input):"): result.emit("char = input[i]") result.emit("i += 1") for _ in result.start_block("else:"): if state in self.final_states: result.emit("return True") else: result.emit("break") elif_prefix = "" for nextstate, chars in nextstates.iteritems(): final = nextstate in self.final_states compressed = compress_char_set(chars) if nextstate in above: continue_prefix = "continue" else: continue_prefix = "" for i, (a, num) in enumerate(compressed): if num < 5: for charord in range(ord(a), ord(a) + num): for _ in result.start_block( "%sif char == %r:" % ( elif_prefix, chr(charord))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" else: for _ in result.start_block( "%sif %r <= char <= %r:" % ( elif_prefix, a, chr(ord(a) + num - 1))): result.emit("state = %s""" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" for _ in result.start_block("else:"): result.emit("break") #print state_to_chars.keys() for state in range(self.num_states): if state in state_to_chars: continue for _ in result.start_block("if state == %s:" % (state, )): for _ in result.start_block("if i == len(input):"): result.emit("return True") for _ in result.start_block("else:"): result.emit("break") result.emit("break") result.end_block("while") result.emit("raise LexerError(input, state, i)") result.end_block("def") result = result.get_code() while "\n\n" in result: result = result.replace("\n\n", "\n") #print result d = {'LexerError': LexerError} exec py.code.Source(result).compile() in d return d['recognize']
def make_code(self): from pypy.rlib.parsing.codebuilder import Codebuilder result = Codebuilder() result.start_block("def recognize(input):") result.emit("i = 0") result.emit("state = 0") result.start_block("while 1:") # state_to_chars is a dict containing the sets of # Ex: state_to_chars = { 0: set('a','b','c'), ...} state_to_chars = {} for (state, char), nextstate in self.transitions.iteritems(): state_to_chars.setdefault(state, {}).setdefault(nextstate, set()).add(char) above = set() for state, nextstates in state_to_chars.iteritems(): above.add(state) for _ in result.start_block("if state == %s:" % (state, )): for _ in result.start_block("if i < len(input):"): result.emit("char = input[i]") result.emit("i += 1") for _ in result.start_block("else:"): if state in self.final_states: result.emit("return True") else: result.emit("break") elif_prefix = "" for nextstate, chars in nextstates.iteritems(): final = nextstate in self.final_states compressed = compress_char_set(chars) if nextstate in above: continue_prefix = "continue" else: continue_prefix = "" for i, (a, num) in enumerate(compressed): if num < 5: for charord in range(ord(a), ord(a) + num): for _ in result.start_block( "%sif char == %r:" % (elif_prefix, chr(charord))): result.emit("state = %s" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" else: for _ in result.start_block( "%sif %r <= char <= %r:" % (elif_prefix, a, chr(ord(a) + num - 1))): result.emit("state = %s" "" % (nextstate, )) result.emit(continue_prefix) if not elif_prefix: elif_prefix = "el" for _ in result.start_block("else:"): result.emit("break") #print state_to_chars.keys() for state in range(self.num_states): if state in state_to_chars: continue for _ in result.start_block("if state == %s:" % (state, )): for _ in result.start_block("if i == len(input):"): result.emit("return True") for _ in result.start_block("else:"): result.emit("break") result.emit("break") result.end_block("while") result.emit("raise LexerError(input, state, i)") result.end_block("def") result = result.get_code() while "\n\n" in result: result = result.replace("\n\n", "\n") #print result d = {'LexerError': LexerError} exec py.code.Source(result).compile() in d return d['recognize']