def convert_file_input(config: ParserConfig, children: Sequence[Any]) -> Any: *body, footer = children if len(body) == 0: # If there's no body, the header and footer are ambiguous. The header is more # important, and should own the EmptyLine nodes instead of the footer. header = footer footer = () if (len(config.lines) == 2 and NEWLINE_RE.fullmatch(config.lines[0]) and config.lines[1] == ""): # This is an empty file (not even a comment), so special-case this to an # empty list instead of a single dummy EmptyLine (which is what we'd # normally parse). header = () else: # Steal the leading lines from the first statement, and move them into the # header. first_stmt = body[0] header = first_stmt.leading_lines body[0] = first_stmt.with_changes(leading_lines=()) return Module( header=header, body=body, footer=footer, encoding=config.encoding, default_indent=config.default_indent, default_newline=config.default_newline, has_trailing_newline=config.has_trailing_newline, )
def _detect_trailing_newline(source_str: str) -> bool: if len(source_str) == 0 or not NEWLINE_RE.fullmatch(source_str[-1]): return False # Make sure that the last newline wasn't following a continuation return not ( _CONTINUATION_RE.fullmatch(source_str[-2:]) or _CONTINUATION_RE.fullmatch(source_str[-3:]) )
def _detect_default_newline(source_str: str) -> str: """ Finds the first newline, and uses that value as the default newline. """ # Don't use `NEWLINE_RE` for this, because it might match multiple newlines as a # single newline. match = NEWLINE_RE.search(source_str) return match.group(0) if match is not None else _FALLBACK_DEFAULT_NEWLINE
def __post_init__(self) -> None: raw_python_version = self.python_version if isinstance(raw_python_version, AutoConfig): # If unspecified, we'll try to pick the same as the running # interpreter. There will always be at least one entry. parsed_python_version = _pick_compatible_python_version() else: # If the caller specified a version, we require that to be a known # version (because we don't want to encourage doing duplicate work # when there weren't syntax changes). # `parse_version_string` will raise a ValueError if the version is # invalid. parsed_python_version = parse_version_string(raw_python_version) if not any( parsed_python_version == parse_version_string(v) for v in KNOWN_PYTHON_VERSION_STRINGS ): comma_versions = ", ".join(KNOWN_PYTHON_VERSION_STRINGS) raise ValueError( "LibCST can only parse code using one of the following versions of " + f"Python's grammar: {comma_versions}. More versions may be " + "supported by future releases." ) # We use object.__setattr__ because the dataclass is frozen. See: # https://docs.python.org/3/library/dataclasses.html#frozen-instances # This should be safe behavior inside of `__post_init__`. object.__setattr__(self, "parsed_python_version", parsed_python_version) encoding = self.encoding if not isinstance(encoding, AutoConfig): try: codecs.lookup(encoding) except LookupError: raise ValueError(f"{repr(encoding)} is not a supported encoding") newline = self.default_newline if ( not isinstance(newline, AutoConfig) and NEWLINE_RE.fullmatch(newline) is None ): raise ValueError( f"Got an invalid value for default_newline: {repr(newline)}" ) indent = self.default_indent if not isinstance(indent, AutoConfig) and _INDENT_RE.fullmatch(indent) is None: raise ValueError(f"Got an invalid value for default_indent: {repr(indent)}")
def __post_init__(self) -> None: raw_python_version = self.python_version # `parse_version_string` will raise a ValueError if the version is invalid. # # We use object.__setattr__ because the dataclass is frozen. See: # https://docs.python.org/3/library/dataclasses.html#frozen-instances # This should be safe behavior inside of `__post_init__`. parsed_python_version = parse_version_string(None if isinstance( raw_python_version, AutoConfig) else raw_python_version) # Once we add support for more versions of Python, we can change this to detect # the supported version range. if parsed_python_version not in ( PythonVersionInfo(3, 5), PythonVersionInfo(3, 6), PythonVersionInfo(3, 7), PythonVersionInfo(3, 8), ): raise ValueError( "LibCST can only parse code using one of the following versions of " + "Python's grammar: 3.5, 3.6, 3.7, 3.8. More versions may be " + "supported by future releases.") object.__setattr__(self, "parsed_python_version", parsed_python_version) encoding = self.encoding if not isinstance(encoding, AutoConfig): try: codecs.lookup(encoding) except LookupError: raise ValueError( f"{repr(encoding)} is not a supported encoding") newline = self.default_newline if (not isinstance(newline, AutoConfig) and NEWLINE_RE.fullmatch(newline) is None): raise ValueError( f"Got an invalid value for default_newline: {repr(newline)}") indent = self.default_indent if not isinstance(indent, AutoConfig) and _INDENT_RE.fullmatch(indent) is None: raise ValueError( f"Got an invalid value for default_indent: {repr(indent)}")
def _parse_newline(config: BaseWhitespaceParserConfig, state: State) -> Optional[Newline]: # begin speculative parsing line_str = config.lines[state.line - 1] newline_match = NEWLINE_RE.match(line_str, state.column) if newline_match is not None: # speculative parsing succeeded newline_str = newline_match.group(0) state.column += len(newline_str) if state.column != len(line_str): raise Exception( "Internal Error: Found a newline, but it wasn't the EOL.") if state.line < len(config.lines): # this newline was the end of a line, and there's another line, # therefore we should move to the next line state.line += 1 state.column = 0 if newline_str == config.default_newline: # Just inherit it from the Module instead of explicitly setting it. return Newline() else: return Newline(newline_str) else: # no newline was found, speculative parsing failed return None
def detect_config( source: Union[str, bytes], *, partial: PartialParserConfig, detect_trailing_newline: bool, detect_default_newline: bool, ) -> ConfigDetectionResult: """ Computes a ParserConfig given the current source code to be parsed and a partial config. """ python_version = partial.parsed_python_version partial_encoding = partial.encoding encoding = (_detect_encoding(source) if isinstance( partial_encoding, AutoConfig) else partial_encoding) source_str = source if isinstance(source, str) else source.decode(encoding) partial_default_newline = partial.default_newline default_newline = ((_detect_default_newline(source_str) if detect_default_newline else _FALLBACK_DEFAULT_NEWLINE) if isinstance(partial_default_newline, AutoConfig) else partial_default_newline) # HACK: The grammar requires a trailing newline, but python doesn't actually require # a trailing newline. Add one onto the end to make the parser happy. We'll strip it # out again during cst.Module's codegen. # # I think parso relies on error recovery support to handle this, which we don't # have. lib2to3 doesn't handle this case at all AFAICT. has_trailing_newline = detect_trailing_newline and bool( len(source_str) != 0 and NEWLINE_RE.match(source_str[-1])) if detect_trailing_newline and not has_trailing_newline: source_str += default_newline lines = split_lines(source_str, keepends=True) tokens = tokenize_lines(lines, python_version) partial_default_indent = partial.default_indent if isinstance(partial_default_indent, AutoConfig): # We need to clone `tokens` before passing it to `_detect_indent`, because # `_detect_indent` consumes some tokens, mutating `tokens`. # # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup` # once `token_dup` is freed at the end of this method (subject to # GC/refcounting). tokens, tokens_dup = itertools.tee(tokens) default_indent = _detect_indent(tokens_dup) else: default_indent = partial_default_indent return ConfigDetectionResult( config=ParserConfig( lines=lines, encoding=encoding, default_indent=default_indent, default_newline=default_newline, has_trailing_newline=has_trailing_newline, version=python_version, ), tokens=tokens, )