def _parse_empty_line( config: BaseWhitespaceParserConfig, state: State, *, override_absolute_indent: Optional[str] = None, ) -> Optional[EmptyLine]: # begin speculative parsing speculative_state = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) try: indent = _parse_indent( config, speculative_state, override_absolute_indent=override_absolute_indent) except Exception: # We aren't on a new line, speculative parsing failed return None whitespace = parse_simple_whitespace(config, speculative_state) comment = _parse_comment(config, speculative_state) newline = _parse_newline(config, speculative_state) if newline is None: # speculative parsing failed return None # speculative parsing succeeded state.line = speculative_state.line state.column = speculative_state.column # don't need to copy absolute_indent/is_parenthesized because they don't change. return EmptyLine(indent, whitespace, comment, newline)
def parse_empty_lines( config: BaseWhitespaceParserConfig, state: State, *, override_absolute_indent: Optional[str] = None, ) -> Sequence[EmptyLine]: # If override_absolute_indent is true, then we need to parse all lines up # to and including the last line that is indented at our level. These all # belong to the footer and not to the next line's leading_lines. All lines # that have indent=False and come after the last line where indent=True # do not belong to this node. state_for_line = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) lines: List[Tuple[State, EmptyLine]] = [] while True: el = _parse_empty_line( config, state_for_line, override_absolute_indent=override_absolute_indent) if el is None: break # Store the updated state with the element we parsed. Then make a new state # clone for the next element. lines.append((state_for_line, el)) state_for_line = State( state_for_line.line, state_for_line.column, state.absolute_indent, state.is_parenthesized, ) if override_absolute_indent is not None: # We need to find the last element that is indented, and then split the list # at that point. for i in range(len(lines) - 1, -1, -1): if lines[i][1].indent: lines = lines[:(i + 1)] break else: # We didn't find any lines, throw them all away lines = [] if lines: # Update the state line and column to match the last line actually parsed. final_state: State = lines[-1][0] state.line = final_state.line state.column = final_state.column return [r[1] for r in lines]
def _parse_trailing_whitespace(config: BaseWhitespaceParserConfig, state: State) -> Optional[TrailingWhitespace]: # Begin speculative parsing speculative_state = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) whitespace = parse_simple_whitespace(config, speculative_state) comment = _parse_comment(config, speculative_state) newline = _parse_newline(config, speculative_state) if newline is None: # Speculative parsing failed return None # Speculative parsing succeeded state.line = speculative_state.line state.column = speculative_state.column # don't need to copy absolute_indent/is_parenthesized because they don't change. return TrailingWhitespace(whitespace, comment, newline)
class _TokenizeState: lines: Sequence[str] previous_whitespace_state: WhitespaceState = field( default_factory=lambda: WhitespaceState( line=1, column=0, absolute_indent="", is_parenthesized=False)) indents: List[str] = field(default_factory=lambda: [""]) parenthesis_or_fstring_stack: List[ _ParenthesisOrFStringStackEntry] = field(default_factory=list)
def parse_simple_whitespace(config: BaseWhitespaceParserConfig, state: State) -> SimpleWhitespace: # The match never fails because the pattern can match an empty string lines = config.lines # pyre-fixme[16]: Optional type has no attribute `group`. ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) ws_line_list = [ws_line] while "\\" in ws_line: # continuation character state.line += 1 state.column = 0 ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0) ws_line_list.append(ws_line) # TODO: we could special-case the common case where there's no continuation # character to avoid list construction and joining. # once we've finished collecting continuation characters state.column += len(ws_line) return SimpleWhitespace("".join(ws_line_list))
def _parse_newline(config: BaseWhitespaceParserConfig, state: State) -> Optional[Newline]: # begin speculative parsing line_str = config.lines[state.line - 1] newline_match = NEWLINE_RE.match(line_str, state.column) if newline_match is not None: # speculative parsing succeeded newline_str = newline_match.group(0) state.column += len(newline_str) if state.column != len(line_str): raise Exception( "Internal Error: Found a newline, but it wasn't the EOL.") if state.line < len(config.lines): # this newline was the end of a line, and there's another line, # therefore we should move to the next line state.line += 1 state.column = 0 if newline_str == config.default_newline: # Just inherit it from the Module instead of explicitly setting it. return Newline() else: return Newline(newline_str) else: # no newline was found, speculative parsing failed return None
def _convert_token( # noqa: C901: too complex state: _TokenizeState, curr_token: OrigToken, next_token: Optional[OrigToken]) -> Token: ct_type = curr_token.type ct_string = curr_token.string ct_start_pos = curr_token.start_pos if ct_type is _ERRORTOKEN: raise ParserSyntaxError( f"{ct_string!r} is not a valid token.", lines=state.lines, raw_line=ct_start_pos[0], raw_column=ct_start_pos[1], ) if ct_type is _ERROR_DEDENT: raise ParserSyntaxError( "Inconsistent indentation. Expected a dedent.", lines=state.lines, raw_line=ct_start_pos[0], raw_column=ct_start_pos[1], ) # Compute relative indent changes for indent/dedent nodes relative_indent: Optional[str] = None if ct_type is _INDENT: old_indent = "" if len(state.indents) < 2 else state.indents[-2] new_indent = state.indents[-1] relative_indent = new_indent[len(old_indent):] if next_token is not None: nt_type = next_token.type if nt_type is _INDENT: nt_line, nt_column = next_token.start_pos state.indents.append(state.lines[nt_line - 1][:nt_column]) elif nt_type is _DEDENT: state.indents.pop() whitespace_before = state.previous_whitespace_state if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER: # Don't update whitespace state for these dummy tokens. This makes it possible # to partially parse whitespace for IndentedBlock footers, and then parse the # rest of the whitespace in the following statement's leading_lines. # Unfortunately, that means that the indentation is either wrong for the footer # comments, or for the next line. We've chosen to allow it to be wrong for the # IndentedBlock footer and manually override the state when parsing whitespace # in that particular node. whitespace_after = whitespace_before ct_end_pos = ct_start_pos else: # Not a dummy token, so update the whitespace state. # Compute our own end_pos, since parso's end_pos is wrong for triple-strings. lines = split_lines(ct_string) if len(lines) > 1: ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1]) else: ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string)) # Figure out what mode the whitespace parser should use. If we're inside # parentheses, certain whitespace (e.g. newlines) are allowed where they would # otherwise not be. f-strings override and disable this behavior, however. # # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to # duplicate that logic here. pof_stack = state.parenthesis_or_fstring_stack try: if ct_type is _FSTRING_START: pof_stack.append(_FSTRING_STACK_ENTRY) elif ct_type is _FSTRING_END: pof_stack.pop() elif ct_type is _OP: if ct_string in "([{": pof_stack.append(_PARENTHESIS_STACK_ENTRY) elif ct_string in ")]}": pof_stack.pop() except IndexError: # pof_stack may be empty by the time we need to read from it due to # mismatched braces. raise ParserSyntaxError( "Encountered a closing brace without a matching opening brace.", lines=state.lines, raw_line=ct_start_pos[0], raw_column=ct_start_pos[1], ) is_parenthesized = (len(pof_stack) > 0 and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY) whitespace_after = WhitespaceState(ct_end_pos[0], ct_end_pos[1], state.indents[-1], is_parenthesized) # Hold onto whitespace_after, so we can use it as whitespace_before in the next # node. state.previous_whitespace_state = whitespace_after return Token( ct_type, ct_string, ct_start_pos, ct_end_pos, whitespace_before, whitespace_after, relative_indent, )