예제 #1
0
def _parse_empty_line(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Optional[EmptyLine]:
    # begin speculative parsing
    speculative_state = State(state.line, state.column, state.absolute_indent,
                              state.is_parenthesized)
    try:
        indent = _parse_indent(
            config,
            speculative_state,
            override_absolute_indent=override_absolute_indent)
    except Exception:
        # We aren't on a new line, speculative parsing failed
        return None
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # speculative parsing failed
        return None
    # speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return EmptyLine(indent, whitespace, comment, newline)
예제 #2
0
def parse_empty_lines(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Sequence[EmptyLine]:
    # If override_absolute_indent is true, then we need to parse all lines up
    # to and including the last line that is indented at our level. These all
    # belong to the footer and not to the next line's leading_lines. All lines
    # that have indent=False and come after the last line where indent=True
    # do not belong to this node.
    state_for_line = State(state.line, state.column, state.absolute_indent,
                           state.is_parenthesized)
    lines: List[Tuple[State, EmptyLine]] = []
    while True:
        el = _parse_empty_line(
            config,
            state_for_line,
            override_absolute_indent=override_absolute_indent)
        if el is None:
            break

        # Store the updated state with the element we parsed. Then make a new state
        # clone for the next element.
        lines.append((state_for_line, el))
        state_for_line = State(
            state_for_line.line,
            state_for_line.column,
            state.absolute_indent,
            state.is_parenthesized,
        )

    if override_absolute_indent is not None:
        # We need to find the last element that is indented, and then split the list
        # at that point.
        for i in range(len(lines) - 1, -1, -1):
            if lines[i][1].indent:
                lines = lines[:(i + 1)]
                break
        else:
            # We didn't find any lines, throw them all away
            lines = []

    if lines:
        # Update the state line and column to match the last line actually parsed.
        final_state: State = lines[-1][0]
        state.line = final_state.line
        state.column = final_state.column
    return [r[1] for r in lines]
예제 #3
0
def _parse_trailing_whitespace(config: BaseWhitespaceParserConfig,
                               state: State) -> Optional[TrailingWhitespace]:
    # Begin speculative parsing
    speculative_state = State(state.line, state.column, state.absolute_indent,
                              state.is_parenthesized)
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # Speculative parsing failed
        return None
    # Speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return TrailingWhitespace(whitespace, comment, newline)
예제 #4
0
class _TokenizeState:
    lines: Sequence[str]
    previous_whitespace_state: WhitespaceState = field(
        default_factory=lambda: WhitespaceState(
            line=1, column=0, absolute_indent="", is_parenthesized=False))
    indents: List[str] = field(default_factory=lambda: [""])
    parenthesis_or_fstring_stack: List[
        _ParenthesisOrFStringStackEntry] = field(default_factory=list)
예제 #5
0
def parse_simple_whitespace(config: BaseWhitespaceParserConfig,
                            state: State) -> SimpleWhitespace:
    # The match never fails because the pattern can match an empty string
    lines = config.lines
    # pyre-fixme[16]: Optional type has no attribute `group`.
    ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1],
                                         state.column).group(0)
    ws_line_list = [ws_line]
    while "\\" in ws_line:
        # continuation character
        state.line += 1
        state.column = 0
        ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1],
                                             state.column).group(0)
        ws_line_list.append(ws_line)

    # TODO: we could special-case the common case where there's no continuation
    # character to avoid list construction and joining.

    # once we've finished collecting continuation characters
    state.column += len(ws_line)
    return SimpleWhitespace("".join(ws_line_list))
예제 #6
0
def _parse_newline(config: BaseWhitespaceParserConfig,
                   state: State) -> Optional[Newline]:
    # begin speculative parsing
    line_str = config.lines[state.line - 1]
    newline_match = NEWLINE_RE.match(line_str, state.column)
    if newline_match is not None:
        # speculative parsing succeeded
        newline_str = newline_match.group(0)
        state.column += len(newline_str)
        if state.column != len(line_str):
            raise Exception(
                "Internal Error: Found a newline, but it wasn't the EOL.")
        if state.line < len(config.lines):
            # this newline was the end of a line, and there's another line,
            # therefore we should move to the next line
            state.line += 1
            state.column = 0
        if newline_str == config.default_newline:
            # Just inherit it from the Module instead of explicitly setting it.
            return Newline()
        else:
            return Newline(newline_str)
    else:  # no newline was found, speculative parsing failed
        return None
예제 #7
0
def _convert_token(  # noqa: C901: too complex
        state: _TokenizeState, curr_token: OrigToken,
        next_token: Optional[OrigToken]) -> Token:
    ct_type = curr_token.type
    ct_string = curr_token.string
    ct_start_pos = curr_token.start_pos
    if ct_type is _ERRORTOKEN:
        raise ParserSyntaxError(
            f"{ct_string!r} is not a valid token.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )
    if ct_type is _ERROR_DEDENT:
        raise ParserSyntaxError(
            "Inconsistent indentation. Expected a dedent.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )

    # Compute relative indent changes for indent/dedent nodes
    relative_indent: Optional[str] = None
    if ct_type is _INDENT:
        old_indent = "" if len(state.indents) < 2 else state.indents[-2]
        new_indent = state.indents[-1]
        relative_indent = new_indent[len(old_indent):]

    if next_token is not None:
        nt_type = next_token.type
        if nt_type is _INDENT:
            nt_line, nt_column = next_token.start_pos
            state.indents.append(state.lines[nt_line - 1][:nt_column])
        elif nt_type is _DEDENT:
            state.indents.pop()

    whitespace_before = state.previous_whitespace_state

    if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER:
        # Don't update whitespace state for these dummy tokens. This makes it possible
        # to partially parse whitespace for IndentedBlock footers, and then parse the
        # rest of the whitespace in the following statement's leading_lines.
        # Unfortunately, that means that the indentation is either wrong for the footer
        # comments, or for the next line. We've chosen to allow it to be wrong for the
        # IndentedBlock footer and manually override the state when parsing whitespace
        # in that particular node.
        whitespace_after = whitespace_before
        ct_end_pos = ct_start_pos
    else:
        # Not a dummy token, so update the whitespace state.

        # Compute our own end_pos, since parso's end_pos is wrong for triple-strings.
        lines = split_lines(ct_string)
        if len(lines) > 1:
            ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1])
        else:
            ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string))

        # Figure out what mode the whitespace parser should use. If we're inside
        # parentheses, certain whitespace (e.g. newlines) are allowed where they would
        # otherwise not be. f-strings override and disable this behavior, however.
        #
        # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to
        # duplicate that logic here.

        pof_stack = state.parenthesis_or_fstring_stack
        try:
            if ct_type is _FSTRING_START:
                pof_stack.append(_FSTRING_STACK_ENTRY)
            elif ct_type is _FSTRING_END:
                pof_stack.pop()
            elif ct_type is _OP:
                if ct_string in "([{":
                    pof_stack.append(_PARENTHESIS_STACK_ENTRY)
                elif ct_string in ")]}":
                    pof_stack.pop()
        except IndexError:
            # pof_stack may be empty by the time we need to read from it due to
            # mismatched braces.
            raise ParserSyntaxError(
                "Encountered a closing brace without a matching opening brace.",
                lines=state.lines,
                raw_line=ct_start_pos[0],
                raw_column=ct_start_pos[1],
            )
        is_parenthesized = (len(pof_stack) > 0
                            and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY)

        whitespace_after = WhitespaceState(ct_end_pos[0], ct_end_pos[1],
                                           state.indents[-1], is_parenthesized)

    # Hold onto whitespace_after, so we can use it as whitespace_before in the next
    # node.
    state.previous_whitespace_state = whitespace_after

    return Token(
        ct_type,
        ct_string,
        ct_start_pos,
        ct_end_pos,
        whitespace_before,
        whitespace_after,
        relative_indent,
    )