def _parse_newline(config: BaseWhitespaceParserConfig, state: State) -> Optional[Newline]: # begin speculative parsing line_str = config.lines[state.line - 1] newline_match = NEWLINE_RE.match(line_str, state.column) if newline_match is not None: # speculative parsing succeeded newline_str = newline_match.group(0) state.column += len(newline_str) if state.column != len(line_str): raise Exception( "Internal Error: Found a newline, but it wasn't the EOL.") if state.line < len(config.lines): # this newline was the end of a line, and there's another line, # therefore we should move to the next line state.line += 1 state.column = 0 if newline_str == config.default_newline: # Just inherit it from the Module instead of explicitly setting it. return Newline() else: return Newline(newline_str) else: # no newline was found, speculative parsing failed return None
def detect_config( source: Union[str, bytes], *, partial: PartialParserConfig, detect_trailing_newline: bool, detect_default_newline: bool, ) -> ConfigDetectionResult: """ Computes a ParserConfig given the current source code to be parsed and a partial config. """ python_version = partial.parsed_python_version partial_encoding = partial.encoding encoding = (_detect_encoding(source) if isinstance( partial_encoding, AutoConfig) else partial_encoding) source_str = source if isinstance(source, str) else source.decode(encoding) partial_default_newline = partial.default_newline default_newline = ((_detect_default_newline(source_str) if detect_default_newline else _FALLBACK_DEFAULT_NEWLINE) if isinstance(partial_default_newline, AutoConfig) else partial_default_newline) # HACK: The grammar requires a trailing newline, but python doesn't actually require # a trailing newline. Add one onto the end to make the parser happy. We'll strip it # out again during cst.Module's codegen. # # I think parso relies on error recovery support to handle this, which we don't # have. lib2to3 doesn't handle this case at all AFAICT. has_trailing_newline = detect_trailing_newline and bool( len(source_str) != 0 and NEWLINE_RE.match(source_str[-1])) if detect_trailing_newline and not has_trailing_newline: source_str += default_newline lines = split_lines(source_str, keepends=True) tokens = tokenize_lines(lines, python_version) partial_default_indent = partial.default_indent if isinstance(partial_default_indent, AutoConfig): # We need to clone `tokens` before passing it to `_detect_indent`, because # `_detect_indent` consumes some tokens, mutating `tokens`. # # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup` # once `token_dup` is freed at the end of this method (subject to # GC/refcounting). tokens, tokens_dup = itertools.tee(tokens) default_indent = _detect_indent(tokens_dup) else: default_indent = partial_default_indent return ConfigDetectionResult( config=ParserConfig( lines=lines, encoding=encoding, default_indent=default_indent, default_newline=default_newline, has_trailing_newline=has_trailing_newline, version=python_version, ), tokens=tokens, )