def error_recovery(self, token): tos_nodes = self.stack[-1].nodes if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() else: last_leaf = None if self._start_nonterminal == 'file_input' and \ (token.type == PythonTokenTypes.ENDMARKER or token.type == DEDENT and not last_leaf.value.endswith('\n') and not last_leaf.value.endswith('\r')): # In Python statements need to end with a newline. But since it's # possible (and valid in Python) that there's no newline at the # end of a file, we have to recover even if the user doesn't want # error recovery. if self.stack[-1].dfa.from_rule == 'simple_stmt': try: plan = self.stack[-1].dfa.transitions[ PythonTokenTypes.NEWLINE] except KeyError: pass else: if plan.next_dfa.is_final and not plan.dfa_pushes: # We are ignoring here that the newline would be # required for a simple_stmt. self.stack[-1].dfa = plan.next_dfa self._add_token(token) return if not self._error_recovery: return super().error_recovery(token) def current_suite(stack): # For now just discard everything that is not a suite or # file_input, if we detect an error. for until_index, stack_node in reversed(list(enumerate(stack))): # `suite` can sometimes be only simple_stmt, not stmt. if stack_node.nonterminal == 'file_input': break elif stack_node.nonterminal == 'suite': # In the case where we just have a newline we don't want to # do error recovery here. In all other cases, we want to do # error recovery. if len(stack_node.nodes) != 1: break return until_index until_index = current_suite(self.stack) if self._stack_removal(until_index + 1): self._add_token(token) else: typ, value, start_pos, prefix = token if typ == INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent_list.append(self._indent_counter) error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) self.stack[-1].nodes.append(error_leaf) tos = self.stack[-1] if tos.nonterminal == 'suite': # Need at least one statement in the suite. This happend with the # error recovery above. try: tos.dfa = tos.dfa.arcs['stmt'] except KeyError: # We're already in a final state. pass
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, add_token_callback): def get_symbol_and_nodes(stack): for dfa, state, (type_, nodes) in stack: symbol = pgen_grammar.number2symbol[type_] yield symbol, nodes tos_nodes = stack.get_tos_nodes() if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() else: last_leaf = None if self._start_symbol == 'file_input' and \ (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): def reduce_stack(states, newstate): # reduce state = newstate while states[state] == [(0, state)]: self.pgen_parser._pop() dfa, state, (type_, nodes) = stack[-1] states, first = dfa # In Python statements need to end with a newline. But since it's # possible (and valid in Python ) that there's no newline at the # end of a file, we have to recover even if the user doesn't want # error recovery. #print('x', pprint.pprint(stack)) ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) dfa, state, (type_, nodes) = stack[-1] symbol = pgen_grammar.number2symbol[type_] states, first = dfa arcs = states[state] # Look for a state with this label for i, newstate in arcs: if ilabel == i: if symbol == 'simple_stmt': # This is basically shifting stack[-1] = (dfa, newstate, (type_, nodes)) reduce_stack(states, newstate) add_token_callback(typ, value, start_pos, prefix) return # Check if we're at the right point #for symbol, nodes in get_symbol_and_nodes(stack): # self.pgen_parser._pop() #break break #symbol = pgen_grammar.number2symbol[type_] if not self._error_recovery: return super(Parser, self).error_recovery(pgen_grammar, stack, arcs, typ, value, start_pos, prefix, add_token_callback) def current_suite(stack): # For now just discard everything that is not a suite or # file_input, if we detect an error. for index, (symbol, nodes) in reversed( list(enumerate(get_symbol_and_nodes(stack)))): # `suite` can sometimes be only simple_stmt, not stmt. if symbol == 'file_input': break elif symbol == 'suite' and len(nodes) > 1: # suites without an indent in them get discarded. break return index, symbol, nodes index, symbol, nodes = current_suite(stack) # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos): add_token_callback(typ, value, start_pos, prefix) else: if typ == INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent_list.append(self._indent_counter) error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) stack[-1][2][1].append(error_leaf) if symbol == 'suite': dfa, state, node = stack[-1] states, first = dfa arcs = states[state] intended_label = pgen_grammar.symbol2label['stmt'] # Introduce a proper state transition. We're basically allowing # there to be no valid statements inside a suite. if [x[0] for x in arcs] == [intended_label]: new_state = arcs[0][1] stack[-1] = dfa, new_state, node