Beispiel #1
0
    def parse(self, tokens):
        start_number = self._grammar.symbol2number[self._start_symbol]
        self.pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )

        node = self.pgen_parser.parse(tokens)
        # The stack is empty now, we don't need it anymore.
        del self.pgen_parser
        return node
Beispiel #2
0
    def parse(self, tokenizer):
        if self._parsed is not None:
            return self._parsed

        start_number = self._grammar.symbol2number[self._start_symbol]
        pgen_parser = PgenParser(self._grammar, self.convert_node,
                                 self.convert_leaf, self.error_recovery,
                                 start_number)

        try:
            self._parsed = pgen_parser.parse(tokenizer)
        finally:
            self.stack = pgen_parser.stack

        if self._start_symbol == 'file_input' != self._parsed.type:
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self._parsed = self.convert_node(
                self._grammar, self._grammar.symbol2number['file_input'],
                [self._parsed])

        if self._added_newline:
            self.remove_last_newline()
Beispiel #3
0
    def parse(self, tokenizer):
        if self._parsed is not None:
            return self._parsed

        start_number = self._grammar.symbol2number[self._start_symbol]
        pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )

        try:
            self._parsed = pgen_parser.parse(tokenizer)
        finally:
            self.stack = pgen_parser.stack

        if self._start_symbol == 'file_input' != self._parsed.type:
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self._parsed = self.convert_node(self._grammar,
                                             self._grammar.symbol2number['file_input'],
                                             [self._parsed])

        if self._added_newline:
            self.remove_last_newline()
Beispiel #4
0
class BaseParser(object):
    node_map = {}
    default_node = tree.Node

    leaf_map = {
    }
    default_leaf = tree.Leaf

    def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
        self._grammar = grammar
        self._start_symbol = start_symbol
        self._error_recovery = error_recovery

    def parse(self, tokens):
        start_number = self._grammar.symbol2number[self._start_symbol]
        self.pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )

        node = self.pgen_parser.parse(tokens)
        # The stack is empty now, we don't need it anymore.
        del self.pgen_parser
        return node

    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)

    def convert_node(self, grammar, type_, children):
        # TODO REMOVE symbol, we don't want type here.
        symbol = grammar.number2symbol[type_]
        try:
            return self.node_map[symbol](children)
        except KeyError:
            return self.default_node(symbol, children)

    def convert_leaf(self, grammar, type_, value, prefix, start_pos):
        try:
            return self.leaf_map[type_](value, start_pos, prefix)
        except KeyError:
            return self.default_leaf(value, start_pos, prefix)
Beispiel #5
0
    def __init__(self, grammar, source, module_path=None, tokenizer=None):
        self._ast_mapping = {
            'expr_stmt': pt.ExprStmt,
            'classdef': pt.Class,
            'funcdef': pt.Function,
            'file_input': pt.Module,
            'import_name': pt.ImportName,
            'import_from': pt.ImportFrom,
            'break_stmt': pt.KeywordStatement,
            'continue_stmt': pt.KeywordStatement,
            'return_stmt': pt.ReturnStmt,
            'raise_stmt': pt.KeywordStatement,
            'yield_expr': pt.YieldExpr,
            'del_stmt': pt.KeywordStatement,
            'pass_stmt': pt.KeywordStatement,
            'global_stmt': pt.GlobalStmt,
            'nonlocal_stmt': pt.KeywordStatement,
            'assert_stmt': pt.AssertStmt,
            'if_stmt': pt.IfStmt,
            'with_stmt': pt.WithStmt,
            'for_stmt': pt.ForStmt,
            'while_stmt': pt.WhileStmt,
            'try_stmt': pt.TryStmt,
            'comp_for': pt.CompFor,
            'decorator': pt.Decorator,
            'lambdef': pt.Lambda,
            'old_lambdef': pt.Lambda,
            'lambdef_nocond': pt.Lambda,
        }

        self.syntax_errors = []

        self._global_names = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        self._last_failed_start_pos = (0, 0)

        # TODO do print absolute import detection here.
        #try:
        #    del python_grammar_no_print_statement.keywords["print"]
        #except KeyError:
        #    pass  # Doesn't exist in the Python 3 grammar.

        #if self.options["print_function"]:
        #    python_grammar = pygram.python_grammar_no_print_statement
        #else:
        self._used_names = {}
        self._scope_names_stack = [{}]
        self._error_statement_stacks = []

        added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n'):
            source += '\n'
            added_newline = True

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
        p = PgenParser(grammar, self.convert_node, self.convert_leaf,
                       self.error_recovery)
        tokenizer = tokenizer or tokenize.source_tokens(source)
        self.module = p.parse(self._tokenize(tokenizer))
        if self.module.type != 'file_input':
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self.module = self.convert_node(grammar,
                                            grammar.symbol2number['file_input'],
                                            [self.module])

        if added_newline:
            self.remove_last_newline()
        self.module.used_names = self._used_names
        self.module.path = module_path
        self.module.global_names = self._global_names
        self.module.error_statement_stacks = self._error_statement_stacks
Beispiel #6
0
    def __init__(self, grammar, source, module_path=None, tokenizer=None):
        self._ast_mapping = {
            'expr_stmt': pt.ExprStmt,
            'classdef': pt.Class,
            'funcdef': pt.Function,
            'file_input': pt.Module,
            'import_name': pt.ImportName,
            'import_from': pt.ImportFrom,
            'break_stmt': pt.KeywordStatement,
            'continue_stmt': pt.KeywordStatement,
            'return_stmt': pt.ReturnStmt,
            'raise_stmt': pt.KeywordStatement,
            'yield_expr': pt.YieldExpr,
            'del_stmt': pt.KeywordStatement,
            'pass_stmt': pt.KeywordStatement,
            'global_stmt': pt.GlobalStmt,
            'nonlocal_stmt': pt.KeywordStatement,
            'assert_stmt': pt.AssertStmt,
            'if_stmt': pt.IfStmt,
            'with_stmt': pt.WithStmt,
            'for_stmt': pt.ForStmt,
            'while_stmt': pt.WhileStmt,
            'try_stmt': pt.TryStmt,
            'comp_for': pt.CompFor,
            'decorator': pt.Decorator,
            'lambdef': pt.Lambda,
            'old_lambdef': pt.Lambda,
            'lambdef_nocond': pt.Lambda,
        }

        self.syntax_errors = []

        self._global_names = []
        self._omit_dedent_list = []
        self._indent_counter = 0
        self._last_failed_start_pos = (0, 0)

        # TODO do print absolute import detection here.
        #try:
        #    del python_grammar_no_print_statement.keywords["print"]
        #except KeyError:
        #    pass  # Doesn't exist in the Python 3 grammar.

        #if self.options["print_function"]:
        #    python_grammar = pygram.python_grammar_no_print_statement
        #else:
        self._used_names = {}
        self._scope_names_stack = [{}]
        self._error_statement_stacks = []

        added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n'):
            source += '\n'
            added_newline = True

        # For the fast parser.
        self.position_modifier = pt.PositionModifier()
        p = PgenParser(grammar, self.convert_node, self.convert_leaf,
                       self.error_recovery)
        tokenizer = tokenizer or tokenize.source_tokens(source)
        self.module = p.parse(self._tokenize(tokenizer))
        if self.module.type != 'file_input':
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self.module = self.convert_node(
                grammar, grammar.symbol2number['file_input'], [self.module])

        if added_newline:
            self.remove_last_newline()
        self.module.used_names = self._used_names
        self.module.path = module_path
        self.module.global_names = self._global_names
        self.module.error_statement_stacks = self._error_statement_stacks
Beispiel #7
0
class Parser(object):
    AST_MAPPING = {
        'expr_stmt': pt.ExprStmt,
        'classdef': pt.Class,
        'funcdef': pt.Function,
        'file_input': pt.Module,
        'import_name': pt.ImportName,
        'import_from': pt.ImportFrom,
        'break_stmt': pt.KeywordStatement,
        'continue_stmt': pt.KeywordStatement,
        'return_stmt': pt.ReturnStmt,
        'raise_stmt': pt.KeywordStatement,
        'yield_expr': pt.YieldExpr,
        'del_stmt': pt.KeywordStatement,
        'pass_stmt': pt.KeywordStatement,
        'global_stmt': pt.GlobalStmt,
        'nonlocal_stmt': pt.KeywordStatement,
        'print_stmt': pt.KeywordStatement,
        'assert_stmt': pt.AssertStmt,
        'if_stmt': pt.IfStmt,
        'with_stmt': pt.WithStmt,
        'for_stmt': pt.ForStmt,
        'while_stmt': pt.WhileStmt,
        'try_stmt': pt.TryStmt,
        'comp_for': pt.CompFor,
        'decorator': pt.Decorator,
        'lambdef': pt.Lambda,
        'old_lambdef': pt.Lambda,
        'lambdef_nocond': pt.Lambda,
    }

    def __init__(self, grammar, source, start_symbol='file_input',
                 tokenizer=None, start_parsing=True):
        # Todo Remove start_parsing (with False)

        self._used_names = {}

        self.source = source
        self._added_newline = False
        # The Python grammar needs a newline at the end of each statement.
        if not source.endswith('\n') and start_symbol == 'file_input':
            source += '\n'
            self._added_newline = True

        self._start_symbol = start_symbol
        self._grammar = grammar

        self._parsed = None

        if start_parsing:
            if tokenizer is None:
                tokenizer = tokenize.source_tokens(source, use_exact_op_types=True)
            self.parse(tokenizer)

    def parse(self, tokenizer):
        if self._parsed is not None:
            return self._parsed

        start_number = self._grammar.symbol2number[self._start_symbol]
        self.pgen_parser = PgenParser(
            self._grammar, self.convert_node, self.convert_leaf,
            self.error_recovery, start_number
        )

        self._parsed = self.pgen_parser.parse(tokenizer)

        if self._start_symbol == 'file_input' != self._parsed.type:
            # If there's only one statement, we get back a non-module. That's
            # not what we want, we want a module, so we add it here:
            self._parsed = self.convert_node(self._grammar,
                                             self._grammar.symbol2number['file_input'],
                                             [self._parsed])

        if self._added_newline:
            self.remove_last_newline()
        # The stack is empty now, we don't need it anymore.
        del self.pgen_parser
        return self._parsed

    def get_parsed_node(self):
        # TODO remove in favor of get_root_node
        return self._parsed

    def get_root_node(self):
        return self._parsed

    def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
                       add_token_callback):
        raise ParseError

    def convert_node(self, grammar, type, children):
        """
        Convert raw node information to a Node instance.

        This is passed to the parser driver which calls it whenever a reduction of a
        grammar rule produces a new complete node, so that the tree is build
        strictly bottom-up.
        """
        symbol = grammar.number2symbol[type]
        try:
            return Parser.AST_MAPPING[symbol](children)
        except KeyError:
            if symbol == 'suite':
                # We don't want the INDENT/DEDENT in our parser tree. Those
                # leaves are just cancer. They are virtual leaves and not real
                # ones and therefore have pseudo start/end positions and no
                # prefixes. Just ignore them.
                children = [children[0]] + children[2:-1]
            return pt.Node(symbol, children)

    def convert_leaf(self, grammar, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
        if type == tokenize.NAME:
            if value in grammar.keywords:
                return pt.Keyword(value, start_pos, prefix)
            else:
                name = pt.Name(value, start_pos, prefix)
                # Keep a listing of all used names
                arr = self._used_names.setdefault(name.value, [])
                arr.append(name)
                return name
        elif type == STRING:
            return pt.String(value, start_pos, prefix)
        elif type == NUMBER:
            return pt.Number(value, start_pos, prefix)
        elif type == NEWLINE:
            return pt.Newline(value, start_pos, prefix)
        elif type == ENDMARKER:
            return pt.EndMarker(value, start_pos, prefix)
        else:
            return pt.Operator(value, start_pos, prefix)

    def remove_last_newline(self):
        endmarker = self._parsed.children[-1]
        # The newline is either in the endmarker as a prefix or the previous
        # leaf as a newline token.
        prefix = endmarker.prefix
        if prefix.endswith('\n'):
            endmarker.prefix = prefix = prefix[:-1]
            last_end = 0
            if '\n' not in prefix:
                # Basically if the last line doesn't end with a newline. we
                # have to add the previous line's end_position.
                try:
                    last_end = endmarker.get_previous_leaf().end_pos[1]
                except IndexError:
                    pass
            last_line = re.sub('.*\n', '', prefix)
            endmarker.start_pos = endmarker.line - 1, last_end + len(last_line)
        else:
            try:
                newline = endmarker.get_previous_leaf()
            except IndexError:
                return  # This means that the parser is empty.

            assert newline.value.endswith('\n')
            newline.value = newline.value[:-1]
            endmarker.start_pos = \
                newline.start_pos[0], newline.start_pos[1] + len(newline.value)