def next(self): while True: if self.idx >= len(self.s): raise StopIteration for rule in self._get_current_state().ignore_rules: match = rule.matches(self.s, self.idx) if match: self._update_pos(match) self._make_transition(rule) break else: break for rule in self._get_current_state().rules: match = rule.matches(self.s, self.idx) if match: lineno = self._lineno colno = self._update_pos(match) source_pos = SourcePosition(match.start, lineno, colno) token = Token(rule.name, self.s[match.start:match.end], source_pos) self._make_transition(rule) return token else: raise LexingError(None, SourcePosition(self.idx, -1, -1))
def next(self): while True: if self.idx >= len(self.s): raise StopIteration for rule in self.lexer.ignore_rules: match = rule.matches(self.s, self.idx) if match: self._update_pos(match) break else: break for rule in self.lexer.rules: match = rule.matches(self.s, self.idx) if match: lineno = self._lineno self._colno = self._update_pos(match) source_pos = SourcePosition(match.start, lineno, self._colno) if rule.name == "MISMATCH": raise LexingError( "%r unexpected" % self.s[match.start:match.end], SourcePosition(self.idx, self._lineno, self._colno)) token = Token(rule.name, self.s[match.start:match.end], source_pos) return token
def 取源码位置(片段): if isinstance(片段, list): if len(片段) > 0: 片段 = 片段[0] if isinstance(片段, Token): return 片段.getsourcepos() # Constant 也是 ast.expr if isinstance(片段, ast.stmt) or isinstance(片段, ast.expr): # TODO: 之前没 import SourcePosition 时, 编译/运行未报错! 需解决 return SourcePosition(0, 片段.lineno, 片段.col_offset) return SourcePosition(0, 0, 0)
def next_literal_mode(self): # "literal" mode, i.e. outside "<?php ?>" tags: generates # one B_LITERAL_BLOCK until the next opening "<?php" tag self.mode = MODE_PHPCODE source = self.source index = self.startindex assert index >= 0 tagindex = source.find('<?', index) if tagindex == -1: tagindex = len(source) assert tagindex >= 0 startindex = self.startindex assert startindex >= 0 block_of_text = source[startindex:tagindex] # may be empty source_pos = SourcePosition(self.startindex, self.startlineno + 1, 0) tok = self.lexer.token_class('B_LITERAL_BLOCK', block_of_text, source_pos) self.startlineno += block_of_text.count('\n') if source[tagindex:tagindex+5].lower() == '<?php': pos = tagindex + 5 elif source[tagindex:tagindex+3] == '<?=': pos = tagindex + 3 self.mode = MODE_EQUALSIGN else: pos = tagindex + 2 self.lexer.input(self.source, pos, self.startlineno) return tok
def next(self): if self.idx >= len(self.s): return None for rule in self.lexer.ignore_rules: match = rule.matches(self.s, self.idx) if match: self.idx = match.end return self.next() for rule in self.lexer.rules: match = rule.matches(self.s, self.idx) if match: # TODO: lineno and colno source_pos = SourcePosition(match.start, -1, -1) token = Token(rule.name, self.s[match.start:match.end], source_pos) self.idx = match.end return token else: raise LexingError(None, SourcePosition(self.idx, -1, -1))
def next(self): if self.idx >= len(self.s): raise StopIteration for rule in self.lexer.ignore_rules: match = rule.matches(self.s, self.idx) if match: self._update_pos(match) return self.next() for rule in self.lexer.rules: match = rule.matches(self.s, self.idx) if match: lineno = self._lineno colno = self._update_pos(match) source_pos = SourcePosition(match.start, lineno, colno) token = Token(rule.name, self.s[match.start:match.end], source_pos) return token else: raise LexingError(None, SourcePosition(self.idx, -1, -1))
def __get_position__(self, cursor): lineno = self.s.count("\n", 0, cursor) + 1 colno = cursor + 1 if lineno > 1: colno = colno - (self.s.rfind("\n", 0, cursor) + 1) sp = SourcePosition(cursor, lineno, colno) assert colno > 0 assert lineno > 0 return sp
def end_current_block(self, tok, endpos): # a "?>" marker that ends the current block of code # generates a ";" token followed by a B_LITERAL_BLOCK lineno = tok.source_pos.lineno self.startlineno = lineno self.startindex = endpos + 1 self.mode = MODE_LITERAL if (self.startindex < len(self.source) and self.source[self.startindex] == '\n'): # self.startlineno += 1 # consume \n if immediately following self.startindex += 1 return self.lexer.token_class(";", ";", SourcePosition(endpos, lineno, 0))
def next(self): while True: if self.idx >= len(self.s): raise StopIteration for rule in self.lexer.ignore_rules: match = rule.matches(self.s, self.idx) if match: self._update_pos(match) break else: break for rule in self.lexer.rules: match = rule.matches(self.s, self.idx) if match: lineno = self._lineno colno = self._update_pos(match) source_pos = SourcePosition(match.start, lineno, colno) source_str = self.s[match.start:match.end] name = self.lexer.reserved_dict.get(source_str, rule.name) token = Token(name, source_str, source_pos) return token else: raise LexingError(None, SourcePosition(self.idx, -1, -1))
def token(self): if self.pos >= len(self.buf): return None for rule, token_type in self.rules: m = rule.match(self.buf, pos=self.pos) if m: end = m.end() assert end >= 0 val = self.buf[self.pos:end] tok = Token(token_type, val, SourcePosition(self.pos, self.lineno, 0)) if token_type == "END_OF_LINE": self.lineno += 1 self.pos = end return tok raise IniLexerError(self.lineno)
def test_parse_error(self): pg = ParserGenerator(["VALUE"]) @pg.production("main : VALUE") def main(p): return p[0] parser = pg.build() with py.test.raises(ParsingError) as exc_info: parser.parse(FakeLexer([ Token("VALUE", "hello"), Token("VALUE", "world", SourcePosition(5, 10, 2)), ])) assert exc_info.value.getsourcepos().lineno == 10
def next(self): if self.idx >= len(self.s): raise StopIteration for rule in self.lexer.ignore_rules: match = rule.matches(self.s, self.idx) if match: self.idx = match.end return self.next() for rule in self.lexer.rules: match = rule.matches(self.s, self.idx) if match: source_pos = self.__get_position__(match.start) token = Token(rule.name, self.s[match.start:match.end], source_pos) self.idx = match.end return token else: raise LexingError(None, SourcePosition(self.idx, -1, -1))
def whitespace_empty(self, p): return Token('H_WHITESPACE', '', SourcePosition(0, 0, 0))
def next_equal_sign(self): self.mode = MODE_PHPCODE source_pos = SourcePosition(self.startindex, self.startlineno + 1, 0) return self.lexer.token_class("T_ECHO", "echo", source_pos)
def test_repr(self): t = SourcePosition(1, 2, 3) assert repr(t) == "SourcePosition(idx=1, lineno=2, colno=3)"
def current_pos(self): return SourcePosition(self.idx, self.lineno, self.columno)
def test_eq(self): t = Token("VALUE", "3", SourcePosition(-1, -1, -1)) assert not (t == 3) assert t != 3
def test_source_pos(self): t = Token("VALUE", "3", SourcePosition(5, 2, 1)) assert t.getsourcepos().lineno == 2
def current_pos(self): """ Return the rply SourcePosition object with current index, line number and column number """ return SourcePosition(self.idx, self.lineno, self.columno)
def current_pos(self): return SourcePosition(self.get_idx(), self.get_lineno(), self.get_columno())
def __init__(self, name, source, source_pos=SourcePosition(0, 0, 0)): self.name = name self.source = source self.source_pos = source_pos
def test_source_pos(self): sp = SourcePosition(1, 2, 3) assert sp.idx == 1 assert sp.lineno == 2 assert sp.colno == 3