Beispiel #1
0
 def __init__(self, bnf_text):
     self._bnf_text = bnf_text
     self.generator = tokenize.source_tokens(bnf_text)
     self.gettoken()  # Initialize lookahead
     self.dfas, self.startsymbol = self.parse()
     self.first = {}  # map from symbol name to set of tokens
     self.addfirstsets()
Beispiel #2
0
    def __init__(self, grammar, source, start_symbol='file_input',
                 tokenizer=None, start_parsing=True):
        # Todo Remove start_parsing (with False)

        self._used_names = {}
        self._scope_names_stack = [{}]
        self._last_failed_start_pos = (0, 0)
        self._global_names = []

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()

        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True

        self._start_symbol = start_symbol
        self._grammar = grammar

        self._parsed = None

        if start_parsing:
            if tokenizer is None:
                tokenizer = tokenize.source_tokens(source, use_exact_op_types=True)
            self.parse(tokenizer)
Beispiel #3
0
    def __init__(self,
                 grammar,
                 source,
                 start_symbol='file_input',
                 tokenizer=None,
                 start_parsing=True):
        # Todo Remove start_parsing (with False)

        self._used_names = {}

        self.source = source
        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True

        self._start_symbol = start_symbol
        self._grammar = grammar

        self._parsed = None

        if start_parsing:
            if tokenizer is None:
                tokenizer = tokenize.source_tokens(source,
                                                   use_exact_op_types=True)
            self.parse(tokenizer)
Beispiel #4
0
 def tokenize_without_endmarker(code):
     tokens = tokenize.source_tokens(code, use_exact_op_types=True)
     for token_ in tokens:
         if token_.string == safeword:
             raise EndMarkerReached()
         else:
             yield token_
 def tokenize_without_endmarker(code):
     tokens = tokenize.source_tokens(code, use_exact_op_types=True)
     for token_ in tokens:
         if token_.string == safeword:
             raise EndMarkerReached()
         else:
             yield token_
Beispiel #6
0
    def __init__(self,
                 grammar,
                 source,
                 start_symbol='file_input',
                 tokenizer=None,
                 start_parsing=True):
        # Todo Remove start_parsing (with False)

        self._used_names = {}
        self._scope_names_stack = [{}]
        self._last_failed_start_pos = (0, 0)
        self._global_names = []

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()

        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True

        self.source = source
        self._start_symbol = start_symbol
        self._grammar = grammar

        self._parsed = None

        if start_parsing:
            if tokenizer is None:
                tokenizer = tokenize.source_tokens(source,
                                                   use_exact_op_types=True)
            self.parse(tokenizer)
Beispiel #7
0
 def test_tokenize_multiline_III(self):
     # Make sure multiline string having newlines have the end marker on the
     # next line even if several newline
     fundef = '''""""\n\n'''
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
                           TokenInfo(ENDMARKER,          '', (3, 0), '')]
Beispiel #8
0
 def test_tokenize_multiline_II(self):
     # Make sure multiline string having no newlines have the end marker on
     # same line
     fundef = '''""""'''
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
                           TokenInfo(ENDMARKER,      '', (1, 4), '')]
Beispiel #9
0
 def test_simple_no_whitespace(self):
     # Test a simple one line string, no preceding whitespace
     simple_docstring = '"""simple one line docstring"""'
     tokens = tokenize.source_tokens(simple_docstring)
     token_list = list(tokens)
     _, value, _, prefix = token_list[0]
     assert prefix == ''
     assert value == '"""simple one line docstring"""'
Beispiel #10
0
 def test_simple_no_whitespace(self):
     # Test a simple one line string, no preceding whitespace
     simple_docstring = '"""simple one line docstring"""'
     tokens = tokenize.source_tokens(simple_docstring)
     token_list = list(tokens)
     _, value, _, prefix = token_list[0]
     assert prefix == ''
     assert value == '"""simple one line docstring"""'
Beispiel #11
0
 def test_tokenize_multiline_I(self):
     # Make sure multiline string having newlines have the end marker on the
     # next line
     fundef = '''""""\n'''
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     assert token_list == [
         TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
         TokenInfo(ENDMARKER, '', (2, 0), '')
     ]
Beispiel #12
0
    def __init__(self, source, line_offset=0):
        self.source = source
        self.gen = source_tokens(source, line_offset)
        self.closed = False

        # fast parser options
        self.current = self.previous = Token(None, '', (0, 0))
        self.in_flow = False
        self.new_indent = False
        self.parser_indent = self.old_parser_indent = 0
        self.is_decorator = False
        self.first_stmt = True
Beispiel #13
0
 def tokenize_without_endmarker(code):
     tokens = tokenize.source_tokens(code, use_exact_op_types=True)
     for token_ in tokens:
         if token_.string == safeword:
             raise EndMarkerReached()
         elif token_.type == token.DEDENT and False:
             # Ignore those. Error statements should not contain them, if
             # they do it's for cases where an indentation happens and
             # before the endmarker we still see them.
             pass
         else:
             yield token_
 def tokenize_without_endmarker(code):
     tokens = tokenize.source_tokens(code, use_exact_op_types=True)
     for token_ in tokens:
         if token_.string == safeword:
             raise EndMarkerReached()
         elif token_.type == token.DEDENT and False:
             # Ignore those. Error statements should not contain them, if
             # they do it's for cases where an indentation happens and
             # before the endmarker we still see them.
             pass
         else:
             yield token_
Beispiel #15
0
    def __init__(self, source, line_offset=0):
        self.source = source
        self.gen = source_tokens(source, line_offset)
        self.closed = False

        # fast parser options
        self.current = self.previous = Token(None, '', (0, 0))
        self.in_flow = False
        self.new_indent = False
        self.parser_indent = self.old_parser_indent = 0
        self.is_decorator = False
        self.first_stmt = True
Beispiel #16
0
 def _get_under_cursor_stmt(self, cursor_txt):
     tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
     r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
     try:
         # Take the last statement available.
         stmt = r.module.statements[-1]
     except IndexError:
         raise NotFoundError()
     # Set the start_pos to a pseudo position, that doesn't exist but works
     # perfectly well (for both completions in docstrings and statements).
     stmt.start_pos = self._pos
     stmt.parent = self._parser.user_scope()
     return stmt
Beispiel #17
0
 def test_simple_with_whitespace(self):
     # Test a simple one line string with preceding whitespace and newline
     simple_docstring = '  """simple one line docstring""" \r\n'
     tokens = tokenize.source_tokens(simple_docstring)
     token_list = list(tokens)
     assert token_list[0][0] == INDENT
     typ, value, start_pos, prefix = token_list[1]
     assert prefix == '  '
     assert value == '"""simple one line docstring"""'
     assert typ == STRING
     typ, value, start_pos, prefix = token_list[2]
     assert prefix == ' '
     assert typ == NEWLINE
Beispiel #18
0
 def test_simple_with_whitespace(self):
     # Test a simple one line string with preceding whitespace and newline
     simple_docstring = '  """simple one line docstring""" \r\n'
     tokens = tokenize.source_tokens(simple_docstring)
     token_list = list(tokens)
     assert token_list[0][0] == INDENT
     typ, value, start_pos, prefix = token_list[1]
     assert prefix == '  '
     assert value == '"""simple one line docstring"""'
     assert typ == STRING
     typ, value, start_pos, prefix = token_list[2]
     assert prefix == ' '
     assert typ == NEWLINE
Beispiel #19
0
 def _get_under_cursor_stmt(self, cursor_txt):
     tokenizer = source_tokens(cursor_txt, self._pos[0] - 1)
     r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
     try:
         # Take the last statement available.
         stmt = r.module.statements[-1]
     except IndexError:
         raise NotFoundError()
     # Set the start_pos to a pseudo position, that doesn't exist but works
     # perfectly well (for both completions in docstrings and statements).
     stmt.start_pos = self._pos
     stmt.parent = self._parser.user_scope()
     return stmt
Beispiel #20
0
 def test_identifier_contains_unicode(self):
     fundef = dedent('''
     def 我あφ():
         pass
     ''')
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     unicode_token = token_list[1]
     if is_py3:
         assert unicode_token[0] == NAME
     else:
         # Unicode tokens in Python 2 seem to be identified as operators.
         # They will be ignored in the parser, that's ok.
         assert unicode_token[0] == OP
Beispiel #21
0
 def test_identifier_contains_unicode(self):
     fundef = dedent('''
     def 我あφ():
         pass
     ''')
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     unicode_token = token_list[1]
     if is_py3:
         assert unicode_token[0] == NAME
     else:
         # Unicode tokens in Python 2 seem to be identified as operators.
         # They will be ignored in the parser, that's ok.
         assert unicode_token[0] == OP
Beispiel #22
0
    def __init__(self, source):
        self.source = source
        self._gen = source_tokens(source)
        self._closed = False

        # fast parser options
        self.current = self.previous = NEWLINE, '', (0, 0)
        self._in_flow = False
        self._is_decorator = False
        self._first_stmt = True
        self._parentheses_level = 0
        self._indent_counter = 0
        self._flow_indent_counter = 0
        self._returned_endmarker = False
        self._expect_indent = False
Beispiel #23
0
    def __init__(self, source):
        self.source = source
        self._gen = source_tokens(source)
        self._closed = False

        # fast parser options
        self.current = self.previous = NEWLINE, '', (0, 0)
        self._in_flow = False
        self._is_decorator = False
        self._first_stmt = True
        self._parentheses_level = 0
        self._indent_counter = 0
        self._flow_indent_counter = 0
        self._returned_endmarker = False
        self._expect_indent = False
Beispiel #24
0
    def __init__(self, source, module_path=None, no_docstr=False,
                 tokenizer=None, top_module=None):
        self.no_docstr = no_docstr

        tokenizer = tokenizer or tokenize.source_tokens(source)
        self._gen = PushBackTokenizer(tokenizer)

        # initialize global Scope
        start_pos = next(self._gen).start_pos
        self._gen.push_last_back()
        self.module = pr.SubModule(module_path, start_pos, top_module)
        self._scope = self.module
        self._top_module = top_module or self.module

        try:
            self._parse()
        except (common.MultiLevelStopIteration, StopIteration):
            # StopIteration needs to be added as well, because python 2 has a
            # strange way of handling StopIterations.
            # sometimes StopIteration isn't catched. Just ignore it.

            # on finish, set end_pos correctly
            pass
        s = self._scope
        while s is not None:
            s.end_pos = self._gen.current.end_pos
            s = s.parent

        # clean up unused decorators
        for d in self._decorators:
            # set a parent for unused decorators, avoid NullPointerException
            # because of `self.module.used_names`.
            d.parent = self.module

        self.module.end_pos = self._gen.current.end_pos
        if self._gen.current.type == tokenize.NEWLINE:
            # This case is only relevant with the FastTokenizer, because
            # otherwise there's always an ENDMARKER.
            # we added a newline before, so we need to "remove" it again.
            #
            # NOTE: It should be keep end_pos as-is if the last token of
            # a source is a NEWLINE, otherwise the newline at the end of
            # a source is not included in a ParserNode.code.
            if self._gen.previous.type != tokenize.NEWLINE:
                self.module.end_pos = self._gen.previous.end_pos

        del self._gen
Beispiel #25
0
    def _get_under_cursor_stmt(self, cursor_txt):
        tokenizer = source_tokens(cursor_txt)
        r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer)
        try:
            # Take the last statement available.
            stmt = r.module.statements[-1]
        except IndexError:
            return None

        user_stmt = self._parser.user_stmt()
        if user_stmt is None:
            # Set the start_pos to a pseudo position, that doesn't exist but works
            # perfectly well (for both completions in docstrings and statements).
            pos = self._pos
        else:
            pos = user_stmt.start_pos

        stmt.move(pos[0] - 1, pos[1])  # Moving the offset.
        stmt.parent = self._parser.user_scope()
        return stmt
Beispiel #26
0
    def _get_under_cursor_stmt(self, cursor_txt, start_pos=None):
        tokenizer = source_tokens(cursor_txt)
        r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer)
        try:
            # Take the last statement available that is not an endmarker.
            # And because it's a simple_stmt, we need to get the first child.
            stmt = r.module.children[-2].children[0]
        except (AttributeError, IndexError):
            return None

        user_stmt = self._parser.user_stmt()
        if user_stmt is None:
            # Set the start_pos to a pseudo position, that doesn't exist but
            # works perfectly well (for both completions in docstrings and
            # statements).
            pos = start_pos or self._pos
        else:
            pos = user_stmt.start_pos

        stmt.move(pos[0] - 1, pos[1])  # Moving the offset.
        stmt.parent = self._parser.user_scope()
        return stmt
Beispiel #27
0
    def _get_under_cursor_stmt(self, cursor_txt):
        tokenizer = source_tokens(cursor_txt, line_offset=self._pos[0] - 1)
        r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer)
        try:
            # Take the last statement available.
            stmt = r.module.statements[-1]
        except IndexError:
            raise NotFoundError()
        if isinstance(stmt, pr.KeywordStatement):
            stmt = stmt.stmt
        if not isinstance(stmt, pr.ExprStmt):
            raise NotFoundError()

        user_stmt = self._parser.user_stmt()
        if user_stmt is None:
            # Set the start_pos to a pseudo position, that doesn't exist but works
            # perfectly well (for both completions in docstrings and statements).
            stmt.start_pos = self._pos
        else:
            stmt.start_pos = user_stmt.start_pos
        stmt.parent = self._parser.user_scope()
        return stmt
Beispiel #28
0
    def __init__(self, grammar, source, start_symbol='file_input',
                 tokenizer=None, start_parsing=True):
        # Todo Remove start_parsing (with False)

        self._used_names = {}

        self.source = source
        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True

        self._start_symbol = start_symbol
        self._grammar = grammar

        self._parsed = None

        if start_parsing:
            if tokenizer is None:
                tokenizer = tokenize.source_tokens(source, use_exact_op_types=True)
            self.parse(tokenizer)
Beispiel #29
0
    def _get_under_cursor_stmt(self, cursor_txt, start_pos=None):
        tokenizer = source_tokens(cursor_txt)
        r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer)
        try:
            # Take the last statement available that is not an endmarker.
            # And because it's a simple_stmt, we need to get the first child.
            stmt = r.module.children[-2].children[0]
        except (AttributeError, IndexError):
            return None

        user_stmt = self._parser.user_stmt()
        if user_stmt is None:
            # Set the start_pos to a pseudo position, that doesn't exist but
            # works perfectly well (for both completions in docstrings and
            # statements).
            pos = start_pos or self._pos
        else:
            pos = user_stmt.start_pos

        stmt.move(pos[0] - 1, pos[1])  # Moving the offset.
        stmt.parent = self._parser.user_scope()
        return stmt
Beispiel #30
0
 def test_function_whitespace(self):
     # Test function definition whitespace identification
     fundef = dedent('''
     def test_whitespace(*args, **kwargs):
         x = 1
         if x > 0:
             print(True)
     ''')
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     for _, value, _, prefix in token_list:
         if value == 'test_whitespace':
             assert prefix == ' '
         if value == '(':
             assert prefix == ''
         if value == '*':
             assert prefix == ''
         if value == '**':
             assert prefix == ' '
         if value == 'print':
             assert prefix == '        '
         if value == 'if':
             assert prefix == '    '
Beispiel #31
0
 def test_function_whitespace(self):
     # Test function definition whitespace identification
     fundef = dedent('''
     def test_whitespace(*args, **kwargs):
         x = 1
         if x > 0:
             print(True)
     ''')
     tokens = tokenize.source_tokens(fundef)
     token_list = list(tokens)
     for _, value, _, prefix in token_list:
         if value == 'test_whitespace':
             assert prefix == ' '
         if value == '(':
             assert prefix == ''
         if value == '*':
             assert prefix == ''
         if value == '**':
             assert prefix == ' '
         if value == 'print':
             assert prefix == '        '
         if value == 'if':
             assert prefix == '    '
Beispiel #32
0
    def __init__(self, grammar, source, module_path=None, tokenizer=None):
        self._ast_mapping = {
            'expr_stmt': pt.ExprStmt,
            'classdef': pt.Class,
            'funcdef': pt.Function,
            'file_input': pt.Module,
            'import_name': pt.ImportName,
            'import_from': pt.ImportFrom,
            'break_stmt': pt.KeywordStatement,
            'continue_stmt': pt.KeywordStatement,
            'return_stmt': pt.ReturnStmt,
            'raise_stmt': pt.KeywordStatement,
            'yield_expr': pt.YieldExpr,
            'del_stmt': pt.KeywordStatement,
            'pass_stmt': pt.KeywordStatement,
            'global_stmt': pt.GlobalStmt,
            'nonlocal_stmt': pt.KeywordStatement,
            'assert_stmt': pt.AssertStmt,
            'if_stmt': pt.IfStmt,
            'with_stmt': pt.WithStmt,
            'for_stmt': pt.ForStmt,
            'while_stmt': pt.WhileStmt,
            'try_stmt': pt.TryStmt,
            'comp_for': pt.CompFor,
            'decorator': pt.Decorator,
            'lambdef': pt.Lambda,
            'old_lambdef': pt.Lambda,
            'lambdef_nocond': pt.Lambda,
        }

        self.syntax_errors = []

        self._global_names = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        self._last_failed_start_pos = (0, 0)

        # TODO do print absolute import detection here.
        #try:
        #    del python_grammar_no_print_statement.keywords["print"]
        #except KeyError:
        #    pass  # Doesn't exist in the Python 3 grammar.

        #if self.options["print_function"]:
        #    python_grammar = pygram.python_grammar_no_print_statement
        #else:
        self._used_names = {}
        self._scope_names_stack = [{}]
        self._error_statement_stacks = []

        added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n'):
            source += '\n'
            added_newline = True

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
        p = PgenParser(grammar, self.convert_node, self.convert_leaf,
                       self.error_recovery)
        tokenizer = tokenizer or tokenize.source_tokens(source)
        self.module = p.parse(self._tokenize(tokenizer))
        if self.module.type != 'file_input':
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self.module = self.convert_node(grammar,
                                            grammar.symbol2number['file_input'],
                                            [self.module])

        if added_newline:
            self.remove_last_newline()
        self.module.used_names = self._used_names
        self.module.path = module_path
        self.module.global_names = self._global_names
        self.module.error_statement_stacks = self._error_statement_stacks
Beispiel #33
0
def _get_token_list(string):
    return list(tokenize.source_tokens(string))
Beispiel #34
0
def _get_token_list(string):
    return list(tokenize.source_tokens(string))
Beispiel #35
0
    def __init__(self, grammar, source, module_path=None, tokenizer=None):
        self._ast_mapping = {
            'expr_stmt': pt.ExprStmt,
            'classdef': pt.Class,
            'funcdef': pt.Function,
            'file_input': pt.Module,
            'import_name': pt.ImportName,
            'import_from': pt.ImportFrom,
            'break_stmt': pt.KeywordStatement,
            'continue_stmt': pt.KeywordStatement,
            'return_stmt': pt.ReturnStmt,
            'raise_stmt': pt.KeywordStatement,
            'yield_expr': pt.YieldExpr,
            'del_stmt': pt.KeywordStatement,
            'pass_stmt': pt.KeywordStatement,
            'global_stmt': pt.GlobalStmt,
            'nonlocal_stmt': pt.KeywordStatement,
            'assert_stmt': pt.AssertStmt,
            'if_stmt': pt.IfStmt,
            'with_stmt': pt.WithStmt,
            'for_stmt': pt.ForStmt,
            'while_stmt': pt.WhileStmt,
            'try_stmt': pt.TryStmt,
            'comp_for': pt.CompFor,
            'decorator': pt.Decorator,
            'lambdef': pt.Lambda,
            'old_lambdef': pt.Lambda,
            'lambdef_nocond': pt.Lambda,
        }

        self.syntax_errors = []

        self._global_names = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        self._last_failed_start_pos = (0, 0)

        # TODO do print absolute import detection here.
        #try:
        #    del python_grammar_no_print_statement.keywords["print"]
        #except KeyError:
        #    pass  # Doesn't exist in the Python 3 grammar.

        #if self.options["print_function"]:
        #    python_grammar = pygram.python_grammar_no_print_statement
        #else:
        self._used_names = {}
        self._scope_names_stack = [{}]
        self._error_statement_stacks = []

        added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n'):
            source += '\n'
            added_newline = True

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
        p = PgenParser(grammar, self.convert_node, self.convert_leaf,
                       self.error_recovery)
        tokenizer = tokenizer or tokenize.source_tokens(source)
        self.module = p.parse(self._tokenize(tokenizer))
        if self.module.type != 'file_input':
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self.module = self.convert_node(
                grammar, grammar.symbol2number['file_input'], [self.module])

        if added_newline:
            self.remove_last_newline()
        self.module.used_names = self._used_names
        self.module.path = module_path
        self.module.global_names = self._global_names
        self.module.error_statement_stacks = self._error_statement_stacks