def parse(self, tokens): start_number = self._grammar.symbol2number[self._start_symbol] self.pgen_parser = PgenParser( self._grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number ) node = self.pgen_parser.parse(tokens) # The stack is empty now, we don't need it anymore. del self.pgen_parser return node
def parse(self, tokenizer): if self._parsed is not None: return self._parsed start_number = self._grammar.symbol2number[self._start_symbol] pgen_parser = PgenParser(self._grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number) try: self._parsed = pgen_parser.parse(tokenizer) finally: self.stack = pgen_parser.stack if self._start_symbol == 'file_input' != self._parsed.type: # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self._parsed = self.convert_node( self._grammar, self._grammar.symbol2number['file_input'], [self._parsed]) if self._added_newline: self.remove_last_newline()
def parse(self, tokenizer): if self._parsed is not None: return self._parsed start_number = self._grammar.symbol2number[self._start_symbol] pgen_parser = PgenParser( self._grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number ) try: self._parsed = pgen_parser.parse(tokenizer) finally: self.stack = pgen_parser.stack if self._start_symbol == 'file_input' != self._parsed.type: # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self._parsed = self.convert_node(self._grammar, self._grammar.symbol2number['file_input'], [self._parsed]) if self._added_newline: self.remove_last_newline()
class BaseParser(object): node_map = {} default_node = tree.Node leaf_map = { } default_leaf = tree.Leaf def __init__(self, grammar, start_symbol='file_input', error_recovery=False): self._grammar = grammar self._start_symbol = start_symbol self._error_recovery = error_recovery def parse(self, tokens): start_number = self._grammar.symbol2number[self._start_symbol] self.pgen_parser = PgenParser( self._grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number ) node = self.pgen_parser.parse(tokens) # The stack is empty now, we don't need it anymore. del self.pgen_parser return node def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix, add_token_callback): if self._error_recovery: raise NotImplementedError("Error Recovery is not implemented") else: raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos) def convert_node(self, grammar, type_, children): # TODO REMOVE symbol, we don't want type here. symbol = grammar.number2symbol[type_] try: return self.node_map[symbol](children) except KeyError: return self.default_node(symbol, children) def convert_leaf(self, grammar, type_, value, prefix, start_pos): try: return self.leaf_map[type_](value, start_pos, prefix) except KeyError: return self.default_leaf(value, start_pos, prefix)
def __init__(self, grammar, source, module_path=None, tokenizer=None): self._ast_mapping = { 'expr_stmt': pt.ExprStmt, 'classdef': pt.Class, 'funcdef': pt.Function, 'file_input': pt.Module, 'import_name': pt.ImportName, 'import_from': pt.ImportFrom, 'break_stmt': pt.KeywordStatement, 'continue_stmt': pt.KeywordStatement, 'return_stmt': pt.ReturnStmt, 'raise_stmt': pt.KeywordStatement, 'yield_expr': pt.YieldExpr, 'del_stmt': pt.KeywordStatement, 'pass_stmt': pt.KeywordStatement, 'global_stmt': pt.GlobalStmt, 'nonlocal_stmt': pt.KeywordStatement, 'assert_stmt': pt.AssertStmt, 'if_stmt': pt.IfStmt, 'with_stmt': pt.WithStmt, 'for_stmt': pt.ForStmt, 'while_stmt': pt.WhileStmt, 'try_stmt': pt.TryStmt, 'comp_for': pt.CompFor, 'decorator': pt.Decorator, 'lambdef': pt.Lambda, 'old_lambdef': pt.Lambda, 'lambdef_nocond': pt.Lambda, } self.syntax_errors = [] self._global_names = [] self._omit_dedent_list = [] self._indent_counter = 0 self._last_failed_start_pos = (0, 0) # TODO do print absolute import detection here. #try: # del python_grammar_no_print_statement.keywords["print"] #except KeyError: # pass # Doesn't exist in the Python 3 grammar. #if self.options["print_function"]: # python_grammar = pygram.python_grammar_no_print_statement #else: self._used_names = {} self._scope_names_stack = [{}] self._error_statement_stacks = [] added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n'): source += '\n' added_newline = True # For the fast parser. self.position_modifier = pt.PositionModifier() p = PgenParser(grammar, self.convert_node, self.convert_leaf, self.error_recovery) tokenizer = tokenizer or tokenize.source_tokens(source) self.module = p.parse(self._tokenize(tokenizer)) if self.module.type != 'file_input': # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self.module = self.convert_node(grammar, grammar.symbol2number['file_input'], [self.module]) if added_newline: self.remove_last_newline() self.module.used_names = self._used_names self.module.path = module_path self.module.global_names = self._global_names self.module.error_statement_stacks = self._error_statement_stacks
def __init__(self, grammar, source, module_path=None, tokenizer=None): self._ast_mapping = { 'expr_stmt': pt.ExprStmt, 'classdef': pt.Class, 'funcdef': pt.Function, 'file_input': pt.Module, 'import_name': pt.ImportName, 'import_from': pt.ImportFrom, 'break_stmt': pt.KeywordStatement, 'continue_stmt': pt.KeywordStatement, 'return_stmt': pt.ReturnStmt, 'raise_stmt': pt.KeywordStatement, 'yield_expr': pt.YieldExpr, 'del_stmt': pt.KeywordStatement, 'pass_stmt': pt.KeywordStatement, 'global_stmt': pt.GlobalStmt, 'nonlocal_stmt': pt.KeywordStatement, 'assert_stmt': pt.AssertStmt, 'if_stmt': pt.IfStmt, 'with_stmt': pt.WithStmt, 'for_stmt': pt.ForStmt, 'while_stmt': pt.WhileStmt, 'try_stmt': pt.TryStmt, 'comp_for': pt.CompFor, 'decorator': pt.Decorator, 'lambdef': pt.Lambda, 'old_lambdef': pt.Lambda, 'lambdef_nocond': pt.Lambda, } self.syntax_errors = [] self._global_names = [] self._omit_dedent_list = [] self._indent_counter = 0 self._last_failed_start_pos = (0, 0) # TODO do print absolute import detection here. #try: # del python_grammar_no_print_statement.keywords["print"] #except KeyError: # pass # Doesn't exist in the Python 3 grammar. #if self.options["print_function"]: # python_grammar = pygram.python_grammar_no_print_statement #else: self._used_names = {} self._scope_names_stack = [{}] self._error_statement_stacks = [] added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n'): source += '\n' added_newline = True # For the fast parser. self.position_modifier = pt.PositionModifier() p = PgenParser(grammar, self.convert_node, self.convert_leaf, self.error_recovery) tokenizer = tokenizer or tokenize.source_tokens(source) self.module = p.parse(self._tokenize(tokenizer)) if self.module.type != 'file_input': # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self.module = self.convert_node( grammar, grammar.symbol2number['file_input'], [self.module]) if added_newline: self.remove_last_newline() self.module.used_names = self._used_names self.module.path = module_path self.module.global_names = self._global_names self.module.error_statement_stacks = self._error_statement_stacks
class Parser(object): AST_MAPPING = { 'expr_stmt': pt.ExprStmt, 'classdef': pt.Class, 'funcdef': pt.Function, 'file_input': pt.Module, 'import_name': pt.ImportName, 'import_from': pt.ImportFrom, 'break_stmt': pt.KeywordStatement, 'continue_stmt': pt.KeywordStatement, 'return_stmt': pt.ReturnStmt, 'raise_stmt': pt.KeywordStatement, 'yield_expr': pt.YieldExpr, 'del_stmt': pt.KeywordStatement, 'pass_stmt': pt.KeywordStatement, 'global_stmt': pt.GlobalStmt, 'nonlocal_stmt': pt.KeywordStatement, 'print_stmt': pt.KeywordStatement, 'assert_stmt': pt.AssertStmt, 'if_stmt': pt.IfStmt, 'with_stmt': pt.WithStmt, 'for_stmt': pt.ForStmt, 'while_stmt': pt.WhileStmt, 'try_stmt': pt.TryStmt, 'comp_for': pt.CompFor, 'decorator': pt.Decorator, 'lambdef': pt.Lambda, 'old_lambdef': pt.Lambda, 'lambdef_nocond': pt.Lambda, } def __init__(self, grammar, source, start_symbol='file_input', tokenizer=None, start_parsing=True): # Todo Remove start_parsing (with False) self._used_names = {} self.source = source self._added_newline = False # The Python grammar needs a newline at the end of each statement. if not source.endswith('\n') and start_symbol == 'file_input': source += '\n' self._added_newline = True self._start_symbol = start_symbol self._grammar = grammar self._parsed = None if start_parsing: if tokenizer is None: tokenizer = tokenize.source_tokens(source, use_exact_op_types=True) self.parse(tokenizer) def parse(self, tokenizer): if self._parsed is not None: return self._parsed start_number = self._grammar.symbol2number[self._start_symbol] self.pgen_parser = PgenParser( self._grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number ) self._parsed = self.pgen_parser.parse(tokenizer) if self._start_symbol == 'file_input' != self._parsed.type: # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: self._parsed = self.convert_node(self._grammar, self._grammar.symbol2number['file_input'], [self._parsed]) if self._added_newline: self.remove_last_newline() # The stack is empty now, we don't need it anymore. del self.pgen_parser return self._parsed def get_parsed_node(self): # TODO remove in favor of get_root_node return self._parsed def get_root_node(self): return self._parsed def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix, add_token_callback): raise ParseError def convert_node(self, grammar, type, children): """ Convert raw node information to a Node instance. This is passed to the parser driver which calls it whenever a reduction of a grammar rule produces a new complete node, so that the tree is build strictly bottom-up. """ symbol = grammar.number2symbol[type] try: return Parser.AST_MAPPING[symbol](children) except KeyError: if symbol == 'suite': # We don't want the INDENT/DEDENT in our parser tree. Those # leaves are just cancer. They are virtual leaves and not real # ones and therefore have pseudo start/end positions and no # prefixes. Just ignore them. children = [children[0]] + children[2:-1] return pt.Node(symbol, children) def convert_leaf(self, grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) if type == tokenize.NAME: if value in grammar.keywords: return pt.Keyword(value, start_pos, prefix) else: name = pt.Name(value, start_pos, prefix) # Keep a listing of all used names arr = self._used_names.setdefault(name.value, []) arr.append(name) return name elif type == STRING: return pt.String(value, start_pos, prefix) elif type == NUMBER: return pt.Number(value, start_pos, prefix) elif type == NEWLINE: return pt.Newline(value, start_pos, prefix) elif type == ENDMARKER: return pt.EndMarker(value, start_pos, prefix) else: return pt.Operator(value, start_pos, prefix) def remove_last_newline(self): endmarker = self._parsed.children[-1] # The newline is either in the endmarker as a prefix or the previous # leaf as a newline token. prefix = endmarker.prefix if prefix.endswith('\n'): endmarker.prefix = prefix = prefix[:-1] last_end = 0 if '\n' not in prefix: # Basically if the last line doesn't end with a newline. we # have to add the previous line's end_position. try: last_end = endmarker.get_previous_leaf().end_pos[1] except IndexError: pass last_line = re.sub('.*\n', '', prefix) endmarker.start_pos = endmarker.line - 1, last_end + len(last_line) else: try: newline = endmarker.get_previous_leaf() except IndexError: return # This means that the parser is empty. assert newline.value.endswith('\n') newline.value = newline.value[:-1] endmarker.start_pos = \ newline.start_pos[0], newline.start_pos[1] + len(newline.value)