def test_token_methods(self): # Test the methods that deal with tokens: prev/next_token, get_token, get_token_from_offset. source = "import re # comment\n\nfoo = 'bar'\n" atok = asttokens.ASTTokens(source) self.assertEqual(str(atok.tokens[3]), "NEWLINE:'\\n'") self.assertEqual(str(atok.tokens[4]), "NL:'\\n'") self.assertEqual(str(atok.tokens[5]), "NAME:'foo'") self.assertEqual(str(atok.tokens[6]), "OP:'='") self.assertEqual(atok.prev_token(atok.tokens[5]), atok.tokens[3]) self.assertEqual(atok.prev_token(atok.tokens[5], include_extra=True), atok.tokens[4]) self.assertEqual(atok.next_token(atok.tokens[5]), atok.tokens[6]) self.assertEqual(atok.next_token(atok.tokens[1]), atok.tokens[3]) self.assertEqual(atok.next_token(atok.tokens[1], include_extra=True), atok.tokens[2]) self.assertEqual(atok.get_token_from_offset(21), atok.tokens[4]) self.assertEqual(atok.get_token_from_offset(22), atok.tokens[5]) self.assertEqual(atok.get_token_from_offset(23), atok.tokens[5]) self.assertEqual(atok.get_token_from_offset(24), atok.tokens[5]) self.assertEqual(atok.get_token_from_offset(25), atok.tokens[5]) self.assertEqual(atok.get_token_from_offset(26), atok.tokens[6]) self.assertEqual(atok.get_token(2, 0), atok.tokens[4]) self.assertEqual(atok.get_token(3, 0), atok.tokens[5]) self.assertEqual(atok.get_token(3, 1), atok.tokens[5]) self.assertEqual(atok.get_token(3, 2), atok.tokens[5]) self.assertEqual(atok.get_token(3, 3), atok.tokens[5]) self.assertEqual(atok.get_token(3, 4), atok.tokens[6]) self.assertEqual( list( atok.token_range(atok.tokens[4], atok.tokens[6], include_extra=True)), atok.tokens[4:7]) # Verify that find_token works, including for non-coding tokens. self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo'), atok.tokens[5]) self.assertEqual( atok.find_token(atok.tokens[3], token.NAME, 'foo', reverse=True), atok.tokens[9]) self.assertEqual( atok.find_token(atok.tokens[3], token.NAME, reverse=True), atok.tokens[1]) self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT), atok.tokens[9]) self.assertEqual( atok.find_token(atok.tokens[5], tokenize.COMMENT, reverse=True), atok.tokens[2]) self.assertEqual(atok.find_token(atok.tokens[5], token.NEWLINE), atok.tokens[8]) self.assertFalse( token.ISEOF( atok.find_token(atok.tokens[5], tokenize.NEWLINE).type)) self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9]) self.assertTrue( token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))
def _find_last_in_stmt(self, start_token): t = start_token while (not util.match_token(t, token.NEWLINE) and not util.match_token(t, token.OP, ';') and not token.ISEOF(t.type)): t = self._code.next_token(t, include_extra=True) return self._code.prev_token(t)
def find_token(self, start_token, tok_type, tok_str=None, reverse=False): """ Looks for the first token, starting at start_token, that matches tok_type and, if given, the token string. Searches backwards if reverse is True. """ t = start_token advance = self.prev_token if reverse else self.next_token while not match_token(t, tok_type, tok_str) and not token.ISEOF( t.type): t = advance(t) return t
def find_token(self, start_token, tok_type, tok_str=None, reverse=False): """ Looks for the first token, starting at start_token, that matches tok_type and, if given, the token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you can check it with `token.ISEOF(t.type)`. """ t = start_token advance = self.prev_token if reverse else self.next_token while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type): t = advance(t, include_extra=True) return t
def parse_unenclosed_expression(text, start_pos, position): m = identifier_match.match(text, start_pos + 1) pos = m.end(0) pars = [] while True: m = expr_continuation.match(text, pos) if not m: break # it was a dotted part; continue if m.group(1): pos = m.end(0) continue # a braced expression is started, consume it for t_type, content, end_pos in gen_tokens(text, pos): if content in braces: pars.append(content) elif content in braces.values(): last = pars.pop() if braces[last] != content: raise TemplateSyntaxError( "Syntax error parsing interpolated expression", node=text[end_pos - 1:]) if not pars: pos = end_pos break elif token.ISEOF(t_type) or t_type == token.ERRORTOKEN: raise TemplateSyntaxError( "Syntax error parsing interpolated expression", node=text[end_pos:]) expr = text[start_pos + 1:pos] return PythonExpression('$' + expr, expr)
def processTerminal(ctx, cst): """ Process a given CST tuple representing a terminal symbol """ global kwlist, strings_as_symbols # Remember on what line this terminal symbol ended lineno = int(cst[2]) if cst[0] == token.DEDENT: # Indentation is not recorded, but still processed. A # dedent is handled before we process any line number # changes so that we can properly mark the end of a # function. ctx.indent_lvl -= 1 if ctx.indent_lvl == ctx.func_def_lvl: ctx.func_def_lvl = -1 ctx.line += Symbol('', Mark.FUNC_END) return lineno if (lineno != ctx.line.lineno) and (cst[0] != token.STRING): # Handle a token on a new line without seeing a NEWLINE # token (line continuation with backslash). Skip this for # STRINGs so that a display utility can display Python # multi-line strings. ctx.commit(lineno) # Handle tokens if cst[0] == token.NEWLINE: # Handle new line tokens: we ignore them as a change in # the line number for a token will commit a line (or EOF, # see below). pass elif cst[0] == token.INDENT: # Indentation is not recorded, but still processed ctx.indent_lvl += 1 elif cst[0] == token.STRING: # Handle strings: make sure newline's within strings are # escaped. if strings_as_symbols: m = re.search( "^('|\"|'''|\"\"\")([A-Za-z_][A-Za-z_0-9]*)('|\"|'''|\"\"\")$", cst[1]) if m is not None: # We have a string that is a valid Python identifier, emit the # enclosing quotes as non-symbols and the string as a symbol. ctx.line += NonSymbol(m.group(1)) ctx.line += Symbol(m.group(2)) ctx.line += NonSymbol(m.group(3)) else: ctx.line += NonSymbol(cst[1].replace("\n", "\\n")) else: ctx.line += NonSymbol(cst[1].replace("\n", "\\n")) elif cst[0] == token.NAME: # Handle terminal names, could be a python keyword or # user defined symbol, or part of a dotted name sequence. if cst[1] in kwlist: if id(cst) in ctx.marks: # Perhaps print statement used as a function? ctx.line += Symbol(cst[1], ctx.getMark(cst)) else: # Python keywords are treated as non-symbol text ctx.line += NonSymbol(cst[1]) else: # Not a python keyword, symbol text if id(cst) in ctx.marks: s = Symbol(cst[1], ctx.getMark(cst)) else: s = Symbol(cst[1]) ctx.line += s elif (cst[0] == token.DOT) and (id(cst) in ctx.marks): # Add the "." to the include symbol, as we are # building a larger symbol from all the dotted names ctx.line += Symbol(cst[1], ctx.getMark(cst)) elif token.ISEOF(cst[0]): # End of compilation: consume this token without adding it # to the line, committing any line being processed. ctx.commit() else: # All other tokens are simply added to the line ctx.line += NonSymbol(cst[1]) return lineno
def update_event(self, inp=-1): self.set_output_val(0, token.ISEOF(self.input(0)))