def __init__(self, bnf_text): self._bnf_text = bnf_text self.generator = tokenize.source_tokens(bnf_text) self.gettoken() # Initialize lookahead self.dfas, self.startsymbol = self.parse() self.first = {} # map from symbol name to set of tokens self.addfirstsets()
def __init__(self, grammar, source, start_symbol='file_input', tokenizer=None, start_parsing=True): # Todo Remove start_parsing (with False) self._used_names = {} self._scope_names_stack = [{}] self._last_failed_start_pos = (0, 0) self._global_names = [] # For the fast parser. self.position_modifier = pt.PositionModifier() self._added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n') and start_symbol == 'file_input': source += '\n' self._added_newline = True self._start_symbol = start_symbol self._grammar = grammar self._parsed = None if start_parsing: if tokenizer is None: tokenizer = tokenize.source_tokens(source, use_exact_op_types=True) self.parse(tokenizer)
def __init__(self, grammar, source, start_symbol='file_input', tokenizer=None, start_parsing=True): # Todo Remove start_parsing (with False) self._used_names = {} self.source = source self._added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n') and start_symbol == 'file_input': source += '\n' self._added_newline = True self._start_symbol = start_symbol self._grammar = grammar self._parsed = None if start_parsing: if tokenizer is None: tokenizer = tokenize.source_tokens(source, use_exact_op_types=True) self.parse(tokenizer)
def tokenize_without_endmarker(code): tokens = tokenize.source_tokens(code, use_exact_op_types=True) for token_ in tokens: if token_.string == safeword: raise EndMarkerReached() else: yield token_
def __init__(self, grammar, source, start_symbol='file_input', tokenizer=None, start_parsing=True): # Todo Remove start_parsing (with False) self._used_names = {} self._scope_names_stack = [{}] self._last_failed_start_pos = (0, 0) self._global_names = [] # For the fast parser. self.position_modifier = pt.PositionModifier() self._added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n') and start_symbol == 'file_input': source += '\n' self._added_newline = True self.source = source self._start_symbol = start_symbol self._grammar = grammar self._parsed = None if start_parsing: if tokenizer is None: tokenizer = tokenize.source_tokens(source, use_exact_op_types=True) self.parse(tokenizer)
def test_tokenize_multiline_III(self): # Make sure multiline string having newlines have the end marker on the # next line even if several newline fundef = '''""""\n\n''' tokens = tokenize.source_tokens(fundef) token_list = list(tokens) assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''), TokenInfo(ENDMARKER, '', (3, 0), '')]
def test_tokenize_multiline_II(self): # Make sure multiline string having no newlines have the end marker on # same line fundef = '''""""''' tokens = tokenize.source_tokens(fundef) token_list = list(tokens) assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''), TokenInfo(ENDMARKER, '', (1, 4), '')]
def test_simple_no_whitespace(self): # Test a simple one line string, no preceding whitespace simple_docstring = '"""simple one line docstring"""' tokens = tokenize.source_tokens(simple_docstring) token_list = list(tokens) _, value, _, prefix = token_list[0] assert prefix == '' assert value == '"""simple one line docstring"""'
def test_tokenize_multiline_I(self): # Make sure multiline string having newlines have the end marker on the # next line fundef = '''""""\n''' tokens = tokenize.source_tokens(fundef) token_list = list(tokens) assert token_list == [ TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''), TokenInfo(ENDMARKER, '', (2, 0), '') ]
def __init__(self, source, line_offset=0): self.source = source self.gen = source_tokens(source, line_offset) self.closed = False # fast parser options self.current = self.previous = Token(None, '', (0, 0)) self.in_flow = False self.new_indent = False self.parser_indent = self.old_parser_indent = 0 self.is_decorator = False self.first_stmt = True
def tokenize_without_endmarker(code): tokens = tokenize.source_tokens(code, use_exact_op_types=True) for token_ in tokens: if token_.string == safeword: raise EndMarkerReached() elif token_.type == token.DEDENT and False: # Ignore those. Error statements should not contain them, if # they do it's for cases where an indentation happens and # before the endmarker we still see them. pass else: yield token_
def _get_under_cursor_stmt(self, cursor_txt): tokenizer = source_tokens(cursor_txt, self._pos[0] - 1) r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer) try: # Take the last statement available. stmt = r.module.statements[-1] except IndexError: raise NotFoundError() # Set the start_pos to a pseudo position, that doesn't exist but works # perfectly well (for both completions in docstrings and statements). stmt.start_pos = self._pos stmt.parent = self._parser.user_scope() return stmt
def test_simple_with_whitespace(self): # Test a simple one line string with preceding whitespace and newline simple_docstring = ' """simple one line docstring""" \r\n' tokens = tokenize.source_tokens(simple_docstring) token_list = list(tokens) assert token_list[0][0] == INDENT typ, value, start_pos, prefix = token_list[1] assert prefix == ' ' assert value == '"""simple one line docstring"""' assert typ == STRING typ, value, start_pos, prefix = token_list[2] assert prefix == ' ' assert typ == NEWLINE
def test_identifier_contains_unicode(self): fundef = dedent(''' def 我あφ(): pass ''') tokens = tokenize.source_tokens(fundef) token_list = list(tokens) unicode_token = token_list[1] if is_py3: assert unicode_token[0] == NAME else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. assert unicode_token[0] == OP
def __init__(self, source): self.source = source self._gen = source_tokens(source) self._closed = False # fast parser options self.current = self.previous = NEWLINE, '', (0, 0) self._in_flow = False self._is_decorator = False self._first_stmt = True self._parentheses_level = 0 self._indent_counter = 0 self._flow_indent_counter = 0 self._returned_endmarker = False self._expect_indent = False
def __init__(self, source, module_path=None, no_docstr=False, tokenizer=None, top_module=None): self.no_docstr = no_docstr tokenizer = tokenizer or tokenize.source_tokens(source) self._gen = PushBackTokenizer(tokenizer) # initialize global Scope start_pos = next(self._gen).start_pos self._gen.push_last_back() self.module = pr.SubModule(module_path, start_pos, top_module) self._scope = self.module self._top_module = top_module or self.module try: self._parse() except (common.MultiLevelStopIteration, StopIteration): # StopIteration needs to be added as well, because python 2 has a # strange way of handling StopIterations. # sometimes StopIteration isn't catched. Just ignore it. # on finish, set end_pos correctly pass s = self._scope while s is not None: s.end_pos = self._gen.current.end_pos s = s.parent # clean up unused decorators for d in self._decorators: # set a parent for unused decorators, avoid NullPointerException # because of `self.module.used_names`. d.parent = self.module self.module.end_pos = self._gen.current.end_pos if self._gen.current.type == tokenize.NEWLINE: # This case is only relevant with the FastTokenizer, because # otherwise there's always an ENDMARKER. # we added a newline before, so we need to "remove" it again. # # NOTE: It should be keep end_pos as-is if the last token of # a source is a NEWLINE, otherwise the newline at the end of # a source is not included in a ParserNode.code. if self._gen.previous.type != tokenize.NEWLINE: self.module.end_pos = self._gen.previous.end_pos del self._gen
def _get_under_cursor_stmt(self, cursor_txt): tokenizer = source_tokens(cursor_txt) r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer) try: # Take the last statement available. stmt = r.module.statements[-1] except IndexError: return None user_stmt = self._parser.user_stmt() if user_stmt is None: # Set the start_pos to a pseudo position, that doesn't exist but works # perfectly well (for both completions in docstrings and statements). pos = self._pos else: pos = user_stmt.start_pos stmt.move(pos[0] - 1, pos[1]) # Moving the offset. stmt.parent = self._parser.user_scope() return stmt
def _get_under_cursor_stmt(self, cursor_txt, start_pos=None): tokenizer = source_tokens(cursor_txt) r = Parser(self._grammar, cursor_txt, tokenizer=tokenizer) try: # Take the last statement available that is not an endmarker. # And because it's a simple_stmt, we need to get the first child. stmt = r.module.children[-2].children[0] except (AttributeError, IndexError): return None user_stmt = self._parser.user_stmt() if user_stmt is None: # Set the start_pos to a pseudo position, that doesn't exist but # works perfectly well (for both completions in docstrings and # statements). pos = start_pos or self._pos else: pos = user_stmt.start_pos stmt.move(pos[0] - 1, pos[1]) # Moving the offset. stmt.parent = self._parser.user_scope() return stmt
def _get_under_cursor_stmt(self, cursor_txt): tokenizer = source_tokens(cursor_txt, line_offset=self._pos[0] - 1) r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer) try: # Take the last statement available. stmt = r.module.statements[-1] except IndexError: raise NotFoundError() if isinstance(stmt, pr.KeywordStatement): stmt = stmt.stmt if not isinstance(stmt, pr.ExprStmt): raise NotFoundError() user_stmt = self._parser.user_stmt() if user_stmt is None: # Set the start_pos to a pseudo position, that doesn't exist but works # perfectly well (for both completions in docstrings and statements). stmt.start_pos = self._pos else: stmt.start_pos = user_stmt.start_pos stmt.parent = self._parser.user_scope() return stmt
def test_function_whitespace(self): # Test function definition whitespace identification fundef = dedent(''' def test_whitespace(*args, **kwargs): x = 1 if x > 0: print(True) ''') tokens = tokenize.source_tokens(fundef) token_list = list(tokens) for _, value, _, prefix in token_list: if value == 'test_whitespace': assert prefix == ' ' if value == '(': assert prefix == '' if value == '*': assert prefix == '' if value == '**': assert prefix == ' ' if value == 'print': assert prefix == ' ' if value == 'if': assert prefix == ' '
def __init__(self, grammar, source, module_path=None, tokenizer=None): self._ast_mapping = { 'expr_stmt': pt.ExprStmt, 'classdef': pt.Class, 'funcdef': pt.Function, 'file_input': pt.Module, 'import_name': pt.ImportName, 'import_from': pt.ImportFrom, 'break_stmt': pt.KeywordStatement, 'continue_stmt': pt.KeywordStatement, 'return_stmt': pt.ReturnStmt, 'raise_stmt': pt.KeywordStatement, 'yield_expr': pt.YieldExpr, 'del_stmt': pt.KeywordStatement, 'pass_stmt': pt.KeywordStatement, 'global_stmt': pt.GlobalStmt, 'nonlocal_stmt': pt.KeywordStatement, 'assert_stmt': pt.AssertStmt, 'if_stmt': pt.IfStmt, 'with_stmt': pt.WithStmt, 'for_stmt': pt.ForStmt, 'while_stmt': pt.WhileStmt, 'try_stmt': pt.TryStmt, 'comp_for': pt.CompFor, 'decorator': pt.Decorator, 'lambdef': pt.Lambda, 'old_lambdef': pt.Lambda, 'lambdef_nocond': pt.Lambda, } self.syntax_errors = [] self._global_names = [] self._omit_dedent_list = [] self._indent_counter = 0 self._last_failed_start_pos = (0, 0) # TODO do print absolute import detection here. #try: # del python_grammar_no_print_statement.keywords["print"] #except KeyError: # pass # Doesn't exist in the Python 3 grammar. #if self.options["print_function"]: # python_grammar = pygram.python_grammar_no_print_statement #else: self._used_names = {} self._scope_names_stack = [{}] self._error_statement_stacks = [] added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n'): source += '\n' added_newline = True # For the fast parser. self.position_modifier = pt.PositionModifier() p = PgenParser(grammar, self.convert_node, self.convert_leaf, self.error_recovery) tokenizer = tokenizer or tokenize.source_tokens(source) self.module = p.parse(self._tokenize(tokenizer)) if self.module.type != 'file_input': # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self.module = self.convert_node(grammar, grammar.symbol2number['file_input'], [self.module]) if added_newline: self.remove_last_newline() self.module.used_names = self._used_names self.module.path = module_path self.module.global_names = self._global_names self.module.error_statement_stacks = self._error_statement_stacks
def _get_token_list(string): return list(tokenize.source_tokens(string))
def __init__(self, grammar, source, module_path=None, tokenizer=None): self._ast_mapping = { 'expr_stmt': pt.ExprStmt, 'classdef': pt.Class, 'funcdef': pt.Function, 'file_input': pt.Module, 'import_name': pt.ImportName, 'import_from': pt.ImportFrom, 'break_stmt': pt.KeywordStatement, 'continue_stmt': pt.KeywordStatement, 'return_stmt': pt.ReturnStmt, 'raise_stmt': pt.KeywordStatement, 'yield_expr': pt.YieldExpr, 'del_stmt': pt.KeywordStatement, 'pass_stmt': pt.KeywordStatement, 'global_stmt': pt.GlobalStmt, 'nonlocal_stmt': pt.KeywordStatement, 'assert_stmt': pt.AssertStmt, 'if_stmt': pt.IfStmt, 'with_stmt': pt.WithStmt, 'for_stmt': pt.ForStmt, 'while_stmt': pt.WhileStmt, 'try_stmt': pt.TryStmt, 'comp_for': pt.CompFor, 'decorator': pt.Decorator, 'lambdef': pt.Lambda, 'old_lambdef': pt.Lambda, 'lambdef_nocond': pt.Lambda, } self.syntax_errors = [] self._global_names = [] self._omit_dedent_list = [] self._indent_counter = 0 self._last_failed_start_pos = (0, 0) # TODO do print absolute import detection here. #try: # del python_grammar_no_print_statement.keywords["print"] #except KeyError: # pass # Doesn't exist in the Python 3 grammar. #if self.options["print_function"]: # python_grammar = pygram.python_grammar_no_print_statement #else: self._used_names = {} self._scope_names_stack = [{}] self._error_statement_stacks = [] added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n'): source += '\n' added_newline = True # For the fast parser. self.position_modifier = pt.PositionModifier() p = PgenParser(grammar, self.convert_node, self.convert_leaf, self.error_recovery) tokenizer = tokenizer or tokenize.source_tokens(source) self.module = p.parse(self._tokenize(tokenizer)) if self.module.type != 'file_input': # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self.module = self.convert_node( grammar, grammar.symbol2number['file_input'], [self.module]) if added_newline: self.remove_last_newline() self.module.used_names = self._used_names self.module.path = module_path self.module.global_names = self._global_names self.module.error_statement_stacks = self._error_statement_stacks