Beispiel #1
0
def convert_file_input(config: ParserConfig, children: Sequence[Any]) -> Any:
    *body, footer = children
    if len(body) == 0:
        # If there's no body, the header and footer are ambiguous. The header is more
        # important, and should own the EmptyLine nodes instead of the footer.
        header = footer
        footer = ()
        if (len(config.lines) == 2 and NEWLINE_RE.fullmatch(config.lines[0])
                and config.lines[1] == ""):
            # This is an empty file (not even a comment), so special-case this to an
            # empty list instead of a single dummy EmptyLine (which is what we'd
            # normally parse).
            header = ()
    else:
        # Steal the leading lines from the first statement, and move them into the
        # header.
        first_stmt = body[0]
        header = first_stmt.leading_lines
        body[0] = first_stmt.with_changes(leading_lines=())
    return Module(
        header=header,
        body=body,
        footer=footer,
        encoding=config.encoding,
        default_indent=config.default_indent,
        default_newline=config.default_newline,
        has_trailing_newline=config.has_trailing_newline,
    )
Beispiel #2
0
def _detect_trailing_newline(source_str: str) -> bool:
    if len(source_str) == 0 or not NEWLINE_RE.fullmatch(source_str[-1]):
        return False
    # Make sure that the last newline wasn't following a continuation
    return not (
        _CONTINUATION_RE.fullmatch(source_str[-2:])
        or _CONTINUATION_RE.fullmatch(source_str[-3:])
    )
Beispiel #3
0
def _detect_default_newline(source_str: str) -> str:
    """
    Finds the first newline, and uses that value as the default newline.
    """
    # Don't use `NEWLINE_RE` for this, because it might match multiple newlines as a
    # single newline.
    match = NEWLINE_RE.search(source_str)
    return match.group(0) if match is not None else _FALLBACK_DEFAULT_NEWLINE
Beispiel #4
0
    def __post_init__(self) -> None:
        raw_python_version = self.python_version

        if isinstance(raw_python_version, AutoConfig):
            # If unspecified, we'll try to pick the same as the running
            # interpreter.  There will always be at least one entry.
            parsed_python_version = _pick_compatible_python_version()
        else:
            # If the caller specified a version, we require that to be a known
            # version (because we don't want to encourage doing duplicate work
            # when there weren't syntax changes).

            # `parse_version_string` will raise a ValueError if the version is
            # invalid.
            parsed_python_version = parse_version_string(raw_python_version)

        if not any(
            parsed_python_version == parse_version_string(v)
            for v in KNOWN_PYTHON_VERSION_STRINGS
        ):
            comma_versions = ", ".join(KNOWN_PYTHON_VERSION_STRINGS)
            raise ValueError(
                "LibCST can only parse code using one of the following versions of "
                + f"Python's grammar: {comma_versions}. More versions may be "
                + "supported by future releases."
            )

        # We use object.__setattr__ because the dataclass is frozen. See:
        # https://docs.python.org/3/library/dataclasses.html#frozen-instances
        # This should be safe behavior inside of `__post_init__`.
        object.__setattr__(self, "parsed_python_version", parsed_python_version)

        encoding = self.encoding
        if not isinstance(encoding, AutoConfig):
            try:
                codecs.lookup(encoding)
            except LookupError:
                raise ValueError(f"{repr(encoding)} is not a supported encoding")

        newline = self.default_newline
        if (
            not isinstance(newline, AutoConfig)
            and NEWLINE_RE.fullmatch(newline) is None
        ):
            raise ValueError(
                f"Got an invalid value for default_newline: {repr(newline)}"
            )

        indent = self.default_indent
        if not isinstance(indent, AutoConfig) and _INDENT_RE.fullmatch(indent) is None:
            raise ValueError(f"Got an invalid value for default_indent: {repr(indent)}")
Beispiel #5
0
    def __post_init__(self) -> None:
        raw_python_version = self.python_version
        # `parse_version_string` will raise a ValueError if the version is invalid.
        #
        # We use object.__setattr__ because the dataclass is frozen. See:
        # https://docs.python.org/3/library/dataclasses.html#frozen-instances
        # This should be safe behavior inside of `__post_init__`.
        parsed_python_version = parse_version_string(None if isinstance(
            raw_python_version, AutoConfig) else raw_python_version)

        # Once we add support for more versions of Python, we can change this to detect
        # the supported version range.
        if parsed_python_version not in (
                PythonVersionInfo(3, 5),
                PythonVersionInfo(3, 6),
                PythonVersionInfo(3, 7),
                PythonVersionInfo(3, 8),
        ):
            raise ValueError(
                "LibCST can only parse code using one of the following versions of "
                +
                "Python's grammar: 3.5, 3.6, 3.7, 3.8. More versions may be " +
                "supported by future releases.")

        object.__setattr__(self, "parsed_python_version",
                           parsed_python_version)

        encoding = self.encoding
        if not isinstance(encoding, AutoConfig):
            try:
                codecs.lookup(encoding)
            except LookupError:
                raise ValueError(
                    f"{repr(encoding)} is not a supported encoding")

        newline = self.default_newline
        if (not isinstance(newline, AutoConfig)
                and NEWLINE_RE.fullmatch(newline) is None):
            raise ValueError(
                f"Got an invalid value for default_newline: {repr(newline)}")

        indent = self.default_indent
        if not isinstance(indent,
                          AutoConfig) and _INDENT_RE.fullmatch(indent) is None:
            raise ValueError(
                f"Got an invalid value for default_indent: {repr(indent)}")
Beispiel #6
0
def _parse_newline(config: BaseWhitespaceParserConfig,
                   state: State) -> Optional[Newline]:
    # begin speculative parsing
    line_str = config.lines[state.line - 1]
    newline_match = NEWLINE_RE.match(line_str, state.column)
    if newline_match is not None:
        # speculative parsing succeeded
        newline_str = newline_match.group(0)
        state.column += len(newline_str)
        if state.column != len(line_str):
            raise Exception(
                "Internal Error: Found a newline, but it wasn't the EOL.")
        if state.line < len(config.lines):
            # this newline was the end of a line, and there's another line,
            # therefore we should move to the next line
            state.line += 1
            state.column = 0
        if newline_str == config.default_newline:
            # Just inherit it from the Module instead of explicitly setting it.
            return Newline()
        else:
            return Newline(newline_str)
    else:  # no newline was found, speculative parsing failed
        return None
Beispiel #7
0
def detect_config(
    source: Union[str, bytes],
    *,
    partial: PartialParserConfig,
    detect_trailing_newline: bool,
    detect_default_newline: bool,
) -> ConfigDetectionResult:
    """
    Computes a ParserConfig given the current source code to be parsed and a partial
    config.
    """

    python_version = partial.parsed_python_version

    partial_encoding = partial.encoding
    encoding = (_detect_encoding(source) if isinstance(
        partial_encoding, AutoConfig) else partial_encoding)

    source_str = source if isinstance(source, str) else source.decode(encoding)

    partial_default_newline = partial.default_newline
    default_newline = ((_detect_default_newline(source_str) if
                        detect_default_newline else _FALLBACK_DEFAULT_NEWLINE)
                       if isinstance(partial_default_newline, AutoConfig) else
                       partial_default_newline)

    # HACK: The grammar requires a trailing newline, but python doesn't actually require
    # a trailing newline. Add one onto the end to make the parser happy. We'll strip it
    # out again during cst.Module's codegen.
    #
    # I think parso relies on error recovery support to handle this, which we don't
    # have. lib2to3 doesn't handle this case at all AFAICT.
    has_trailing_newline = detect_trailing_newline and bool(
        len(source_str) != 0 and NEWLINE_RE.match(source_str[-1]))
    if detect_trailing_newline and not has_trailing_newline:
        source_str += default_newline

    lines = split_lines(source_str, keepends=True)

    tokens = tokenize_lines(lines, python_version)

    partial_default_indent = partial.default_indent
    if isinstance(partial_default_indent, AutoConfig):
        # We need to clone `tokens` before passing it to `_detect_indent`, because
        # `_detect_indent` consumes some tokens, mutating `tokens`.
        #
        # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the
        # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup`
        # once `token_dup` is freed at the end of this method (subject to
        # GC/refcounting).
        tokens, tokens_dup = itertools.tee(tokens)
        default_indent = _detect_indent(tokens_dup)
    else:
        default_indent = partial_default_indent

    return ConfigDetectionResult(
        config=ParserConfig(
            lines=lines,
            encoding=encoding,
            default_indent=default_indent,
            default_newline=default_newline,
            has_trailing_newline=has_trailing_newline,
            version=python_version,
        ),
        tokens=tokens,
    )