Example #1
0
    def test_token_methods(self):
        # Test the methods that deal with tokens: prev/next_token, get_token, get_token_from_offset.
        source = "import re  # comment\n\nfoo = 'bar'\n"
        atok = asttokens.ASTTokens(source)
        self.assertEqual(str(atok.tokens[3]), "NEWLINE:'\\n'")
        self.assertEqual(str(atok.tokens[4]), "NL:'\\n'")
        self.assertEqual(str(atok.tokens[5]), "NAME:'foo'")
        self.assertEqual(str(atok.tokens[6]), "OP:'='")
        self.assertEqual(atok.prev_token(atok.tokens[5]), atok.tokens[3])
        self.assertEqual(atok.prev_token(atok.tokens[5], include_extra=True),
                         atok.tokens[4])
        self.assertEqual(atok.next_token(atok.tokens[5]), atok.tokens[6])
        self.assertEqual(atok.next_token(atok.tokens[1]), atok.tokens[3])
        self.assertEqual(atok.next_token(atok.tokens[1], include_extra=True),
                         atok.tokens[2])

        self.assertEqual(atok.get_token_from_offset(21), atok.tokens[4])
        self.assertEqual(atok.get_token_from_offset(22), atok.tokens[5])
        self.assertEqual(atok.get_token_from_offset(23), atok.tokens[5])
        self.assertEqual(atok.get_token_from_offset(24), atok.tokens[5])
        self.assertEqual(atok.get_token_from_offset(25), atok.tokens[5])
        self.assertEqual(atok.get_token_from_offset(26), atok.tokens[6])

        self.assertEqual(atok.get_token(2, 0), atok.tokens[4])
        self.assertEqual(atok.get_token(3, 0), atok.tokens[5])
        self.assertEqual(atok.get_token(3, 1), atok.tokens[5])
        self.assertEqual(atok.get_token(3, 2), atok.tokens[5])
        self.assertEqual(atok.get_token(3, 3), atok.tokens[5])
        self.assertEqual(atok.get_token(3, 4), atok.tokens[6])

        self.assertEqual(
            list(
                atok.token_range(atok.tokens[4],
                                 atok.tokens[6],
                                 include_extra=True)), atok.tokens[4:7])

        # Verify that find_token works, including for non-coding tokens.
        self.assertEqual(atok.find_token(atok.tokens[3], token.NAME, 'foo'),
                         atok.tokens[5])
        self.assertEqual(
            atok.find_token(atok.tokens[3], token.NAME, 'foo', reverse=True),
            atok.tokens[9])
        self.assertEqual(
            atok.find_token(atok.tokens[3], token.NAME, reverse=True),
            atok.tokens[1])
        self.assertEqual(atok.find_token(atok.tokens[5], tokenize.COMMENT),
                         atok.tokens[9])
        self.assertEqual(
            atok.find_token(atok.tokens[5], tokenize.COMMENT, reverse=True),
            atok.tokens[2])
        self.assertEqual(atok.find_token(atok.tokens[5], token.NEWLINE),
                         atok.tokens[8])
        self.assertFalse(
            token.ISEOF(
                atok.find_token(atok.tokens[5], tokenize.NEWLINE).type))
        self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL),
                         atok.tokens[9])
        self.assertTrue(
            token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))
Example #2
0
 def _find_last_in_stmt(self, start_token):
     t = start_token
     while (not util.match_token(t, token.NEWLINE)
            and not util.match_token(t, token.OP, ';')
            and not token.ISEOF(t.type)):
         t = self._code.next_token(t, include_extra=True)
     return self._code.prev_token(t)
Example #3
0
 def find_token(self, start_token, tok_type, tok_str=None, reverse=False):
     """
 Looks for the first token, starting at start_token, that matches tok_type and, if given, the
 token string. Searches backwards if reverse is True.
 """
     t = start_token
     advance = self.prev_token if reverse else self.next_token
     while not match_token(t, tok_type, tok_str) and not token.ISEOF(
             t.type):
         t = advance(t)
     return t
 def find_token(self, start_token, tok_type, tok_str=None, reverse=False):
   """
   Looks for the first token, starting at start_token, that matches tok_type and, if given, the
   token string. Searches backwards if reverse is True. Returns ENDMARKER token if not found (you
   can check it with `token.ISEOF(t.type)`.
   """
   t = start_token
   advance = self.prev_token if reverse else self.next_token
   while not match_token(t, tok_type, tok_str) and not token.ISEOF(t.type):
     t = advance(t, include_extra=True)
   return t
Example #5
0
def parse_unenclosed_expression(text, start_pos, position):
    m = identifier_match.match(text, start_pos + 1)
    pos = m.end(0)
    pars = []
    while True:
        m = expr_continuation.match(text, pos)
        if not m:
            break

        # it was a dotted part; continue
        if m.group(1):
            pos = m.end(0)
            continue

        # a braced expression is started, consume it
        for t_type, content, end_pos in gen_tokens(text, pos):
            if content in braces:
                pars.append(content)

            elif content in braces.values():
                last = pars.pop()
                if braces[last] != content:
                    raise TemplateSyntaxError(
                        "Syntax error parsing interpolated expression",
                        node=text[end_pos - 1:])

                if not pars:
                    pos = end_pos
                    break

            elif token.ISEOF(t_type) or t_type == token.ERRORTOKEN:
                raise TemplateSyntaxError(
                    "Syntax error parsing interpolated expression",
                    node=text[end_pos:])

    expr = text[start_pos + 1:pos]
    return PythonExpression('$' + expr, expr)
Example #6
0
def processTerminal(ctx, cst):
    """ Process a given CST tuple representing a terminal symbol
    """
    global kwlist, strings_as_symbols

    # Remember on what line this terminal symbol ended
    lineno = int(cst[2])

    if cst[0] == token.DEDENT:
        # Indentation is not recorded, but still processed. A
        # dedent is handled before we process any line number
        # changes so that we can properly mark the end of a
        # function.
        ctx.indent_lvl -= 1
        if ctx.indent_lvl == ctx.func_def_lvl:
            ctx.func_def_lvl = -1
            ctx.line += Symbol('', Mark.FUNC_END)
        return lineno

    if (lineno != ctx.line.lineno) and (cst[0] != token.STRING):
        # Handle a token on a new line without seeing a NEWLINE
        # token (line continuation with backslash). Skip this for
        # STRINGs so that a display utility can display Python
        # multi-line strings.
        ctx.commit(lineno)

    # Handle tokens
    if cst[0] == token.NEWLINE:
        # Handle new line tokens: we ignore them as a change in
        # the line number for a token will commit a line (or EOF,
        # see below).
        pass
    elif cst[0] == token.INDENT:
        # Indentation is not recorded, but still processed
        ctx.indent_lvl += 1
    elif cst[0] == token.STRING:
        # Handle strings: make sure newline's within strings are
        # escaped.
        if strings_as_symbols:
            m = re.search(
                "^('|\"|'''|\"\"\")([A-Za-z_][A-Za-z_0-9]*)('|\"|'''|\"\"\")$",
                cst[1])
            if m is not None:
                # We have a string that is a valid Python identifier, emit the
                # enclosing quotes as non-symbols and the string as a symbol.
                ctx.line += NonSymbol(m.group(1))
                ctx.line += Symbol(m.group(2))
                ctx.line += NonSymbol(m.group(3))
            else:
                ctx.line += NonSymbol(cst[1].replace("\n", "\\n"))
        else:
            ctx.line += NonSymbol(cst[1].replace("\n", "\\n"))
    elif cst[0] == token.NAME:
        # Handle terminal names, could be a python keyword or
        # user defined symbol, or part of a dotted name sequence.
        if cst[1] in kwlist:
            if id(cst) in ctx.marks:
                # Perhaps print statement used as a function?
                ctx.line += Symbol(cst[1], ctx.getMark(cst))
            else:
                # Python keywords are treated as non-symbol text
                ctx.line += NonSymbol(cst[1])
        else:
            # Not a python keyword, symbol text
            if id(cst) in ctx.marks:
                s = Symbol(cst[1], ctx.getMark(cst))
            else:
                s = Symbol(cst[1])
            ctx.line += s
    elif (cst[0] == token.DOT) and (id(cst) in ctx.marks):
        # Add the "." to the include symbol, as we are
        # building a larger symbol from all the dotted names
        ctx.line += Symbol(cst[1], ctx.getMark(cst))
    elif token.ISEOF(cst[0]):
        # End of compilation: consume this token without adding it
        # to the line, committing any line being processed.
        ctx.commit()
    else:
        # All other tokens are simply added to the line
        ctx.line += NonSymbol(cst[1])

    return lineno
Example #7
0
 def update_event(self, inp=-1):
     self.set_output_val(0, token.ISEOF(self.input(0)))