Exemple #1
0
    def error_recovery(self, token):
        tos_nodes = self.stack[-1].nodes
        if tos_nodes:
            last_leaf = tos_nodes[-1].get_last_leaf()
        else:
            last_leaf = None

        if self._start_nonterminal == 'file_input' and \
                (token.type == PythonTokenTypes.ENDMARKER
                 or token.type == DEDENT and not last_leaf.value.endswith('\n')
                 and not last_leaf.value.endswith('\r')):
            # In Python statements need to end with a newline. But since it's
            # possible (and valid in Python) that there's no newline at the
            # end of a file, we have to recover even if the user doesn't want
            # error recovery.
            if self.stack[-1].dfa.from_rule == 'simple_stmt':
                try:
                    plan = self.stack[-1].dfa.transitions[
                        PythonTokenTypes.NEWLINE]
                except KeyError:
                    pass
                else:
                    if plan.next_dfa.is_final and not plan.dfa_pushes:
                        # We are ignoring here that the newline would be
                        # required for a simple_stmt.
                        self.stack[-1].dfa = plan.next_dfa
                        self._add_token(token)
                        return

        if not self._error_recovery:
            return super().error_recovery(token)

        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
            for until_index, stack_node in reversed(list(enumerate(stack))):
                # `suite` can sometimes be only simple_stmt, not stmt.
                if stack_node.nonterminal == 'file_input':
                    break
                elif stack_node.nonterminal == 'suite':
                    # In the case where we just have a newline we don't want to
                    # do error recovery here. In all other cases, we want to do
                    # error recovery.
                    if len(stack_node.nodes) != 1:
                        break
            return until_index

        until_index = current_suite(self.stack)

        if self._stack_removal(until_index + 1):
            self._add_token(token)
        else:
            typ, value, start_pos, prefix = token
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)

            error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos,
                                              prefix)
            self.stack[-1].nodes.append(error_leaf)

        tos = self.stack[-1]
        if tos.nonterminal == 'suite':
            # Need at least one statement in the suite. This happend with the
            # error recovery above.
            try:
                tos.dfa = tos.dfa.arcs['stmt']
            except KeyError:
                # We're already in a final state.
                pass
Exemple #2
0
    def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos,
                       prefix, add_token_callback):
        def get_symbol_and_nodes(stack):
            for dfa, state, (type_, nodes) in stack:
                symbol = pgen_grammar.number2symbol[type_]
                yield symbol, nodes

        tos_nodes = stack.get_tos_nodes()
        if tos_nodes:
            last_leaf = tos_nodes[-1].get_last_leaf()
        else:
            last_leaf = None

        if self._start_symbol == 'file_input' and \
                (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):

            def reduce_stack(states, newstate):
                # reduce
                state = newstate
                while states[state] == [(0, state)]:
                    self.pgen_parser._pop()

                    dfa, state, (type_, nodes) = stack[-1]
                    states, first = dfa

            # In Python statements need to end with a newline. But since it's
            # possible (and valid in Python ) that there's no newline at the
            # end of a file, we have to recover even if the user doesn't want
            # error recovery.
            #print('x', pprint.pprint(stack))
            ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)

            dfa, state, (type_, nodes) = stack[-1]
            symbol = pgen_grammar.number2symbol[type_]
            states, first = dfa
            arcs = states[state]
            # Look for a state with this label
            for i, newstate in arcs:
                if ilabel == i:
                    if symbol == 'simple_stmt':
                        # This is basically shifting
                        stack[-1] = (dfa, newstate, (type_, nodes))

                        reduce_stack(states, newstate)
                        add_token_callback(typ, value, start_pos, prefix)
                        return
                    # Check if we're at the right point
                    #for symbol, nodes in get_symbol_and_nodes(stack):
                    #        self.pgen_parser._pop()

                    #break
                    break
            #symbol = pgen_grammar.number2symbol[type_]

        if not self._error_recovery:
            return super(Parser,
                         self).error_recovery(pgen_grammar, stack, arcs, typ,
                                              value, start_pos, prefix,
                                              add_token_callback)

        def current_suite(stack):
            # For now just discard everything that is not a suite or
            # file_input, if we detect an error.
            for index, (symbol, nodes) in reversed(
                    list(enumerate(get_symbol_and_nodes(stack)))):
                # `suite` can sometimes be only simple_stmt, not stmt.
                if symbol == 'file_input':
                    break
                elif symbol == 'suite' and len(nodes) > 1:
                    # suites without an indent in them get discarded.
                    break
            return index, symbol, nodes

        index, symbol, nodes = current_suite(stack)

        # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
        if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value,
                               start_pos):
            add_token_callback(typ, value, start_pos, prefix)
        else:
            if typ == INDENT:
                # For every deleted INDENT we have to delete a DEDENT as well.
                # Otherwise the parser will get into trouble and DEDENT too early.
                self._omit_dedent_list.append(self._indent_counter)

            error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value,
                                              start_pos, prefix)
            stack[-1][2][1].append(error_leaf)

        if symbol == 'suite':
            dfa, state, node = stack[-1]
            states, first = dfa
            arcs = states[state]
            intended_label = pgen_grammar.symbol2label['stmt']
            # Introduce a proper state transition. We're basically allowing
            # there to be no valid statements inside a suite.
            if [x[0] for x in arcs] == [intended_label]:
                new_state = arcs[0][1]
                stack[-1] = dfa, new_state, node