コード例 #1
0
def tokenize(
    code: str,
    version_info: PythonVersionInfo,
    start_pos: Tuple[int, int] = (1, 0)
) -> Generator[PythonToken, None, None]:
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
    return tokenize_lines(lines, version_info, start_pos=start_pos)
コード例 #2
0
def tokenize(code: str, version_info: PythonVersionInfo) -> Iterator[Token]:
    try:
        from libcst_native import tokenize as native_tokenize

        return native_tokenize.tokenize(code)
    except ImportError:
        lines = split_lines(code, keepends=True)
        return tokenize_lines(code, lines, version_info)
コード例 #3
0
 def end_pos(self):
     lines = split_lines(self.string)
     if len(lines) > 1:
         return self.start_pos[0] + len(lines) - 1, 0
     else:
         return self.start_pos[0], self.start_pos[1] + len(self.string)
コード例 #4
0
 def check(code):
     tokens = _get_token_list(code)
     lines = split_lines(code)
     assert tokens[-1].end_pos == (len(lines), len(lines[-1]))
コード例 #5
0
 def test_split_lines(self, string, expected_result, keepends):
     assert split_lines(string, keepends=keepends) == expected_result
コード例 #6
0
ファイル: detect_config.py プロジェクト: pradeep90/LibCST
def detect_config(
    source: Union[str, bytes],
    *,
    partial: PartialParserConfig,
    detect_trailing_newline: bool,
    detect_default_newline: bool,
) -> ConfigDetectionResult:
    """
    Computes a ParserConfig given the current source code to be parsed and a partial
    config.
    """

    python_version = partial.parsed_python_version

    partial_encoding = partial.encoding
    encoding = (
        _detect_encoding(source)
        if isinstance(partial_encoding, AutoConfig)
        else partial_encoding
    )

    source_str = source if isinstance(source, str) else source.decode(encoding)

    partial_default_newline = partial.default_newline
    default_newline = (
        (
            _detect_default_newline(source_str)
            if detect_default_newline
            else _FALLBACK_DEFAULT_NEWLINE
        )
        if isinstance(partial_default_newline, AutoConfig)
        else partial_default_newline
    )

    # HACK: The grammar requires a trailing newline, but python doesn't actually require
    # a trailing newline. Add one onto the end to make the parser happy. We'll strip it
    # out again during cst.Module's codegen.
    #
    # I think parso relies on error recovery support to handle this, which we don't
    # have. lib2to3 doesn't handle this case at all AFAICT.
    has_trailing_newline = detect_trailing_newline and _detect_trailing_newline(
        source_str
    )
    if detect_trailing_newline and not has_trailing_newline:
        source_str += default_newline

    lines = split_lines(source_str, keepends=True)

    tokens = tokenize_lines(lines, python_version)

    partial_default_indent = partial.default_indent
    if isinstance(partial_default_indent, AutoConfig):
        # We need to clone `tokens` before passing it to `_detect_indent`, because
        # `_detect_indent` consumes some tokens, mutating `tokens`.
        #
        # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the
        # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup`
        # once `token_dup` is freed at the end of this method (subject to
        # GC/refcounting).
        tokens, tokens_dup = itertools.tee(tokens)
        default_indent = _detect_indent(tokens_dup)
    else:
        default_indent = partial_default_indent

    partial_future_imports = partial.future_imports
    if isinstance(partial_future_imports, AutoConfig):
        # Same note as above re itertools.tee, we will consume tokens.
        tokens, tokens_dup = itertools.tee(tokens)
        future_imports = _detect_future_imports(tokens_dup)
    else:
        future_imports = partial_future_imports

    return ConfigDetectionResult(
        config=ParserConfig(
            lines=lines,
            encoding=encoding,
            default_indent=default_indent,
            default_newline=default_newline,
            has_trailing_newline=has_trailing_newline,
            version=python_version,
            future_imports=future_imports,
        ),
        tokens=tokens,
    )
コード例 #7
0
def _convert_token(  # noqa: C901: too complex
        state: _TokenizeState, curr_token: OrigToken,
        next_token: Optional[OrigToken]) -> Token:
    ct_type = curr_token.type
    ct_string = curr_token.string
    ct_start_pos = curr_token.start_pos
    if ct_type is _ERRORTOKEN:
        raise ParserSyntaxError(
            f"{ct_string!r} is not a valid token.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )
    if ct_type is _ERROR_DEDENT:
        raise ParserSyntaxError(
            "Inconsistent indentation. Expected a dedent.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )

    # Compute relative indent changes for indent/dedent nodes
    relative_indent: Optional[str] = None
    if ct_type is _INDENT:
        old_indent = "" if len(state.indents) < 2 else state.indents[-2]
        new_indent = state.indents[-1]
        relative_indent = new_indent[len(old_indent):]

    if next_token is not None:
        nt_type = next_token.type
        if nt_type is _INDENT:
            nt_line, nt_column = next_token.start_pos
            state.indents.append(state.lines[nt_line - 1][:nt_column])
        elif nt_type is _DEDENT:
            state.indents.pop()

    whitespace_before = state.previous_whitespace_state

    if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER:
        # Don't update whitespace state for these dummy tokens. This makes it possible
        # to partially parse whitespace for IndentedBlock footers, and then parse the
        # rest of the whitespace in the following statement's leading_lines.
        # Unfortunately, that means that the indentation is either wrong for the footer
        # comments, or for the next line. We've chosen to allow it to be wrong for the
        # IndentedBlock footer and manually override the state when parsing whitespace
        # in that particular node.
        whitespace_after = whitespace_before
        ct_end_pos = ct_start_pos
    else:
        # Not a dummy token, so update the whitespace state.

        # Compute our own end_pos, since parso's end_pos is wrong for triple-strings.
        lines = split_lines(ct_string)
        if len(lines) > 1:
            ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1])
        else:
            ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string))

        # Figure out what mode the whitespace parser should use. If we're inside
        # parentheses, certain whitespace (e.g. newlines) are allowed where they would
        # otherwise not be. f-strings override and disable this behavior, however.
        #
        # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to
        # duplicate that logic here.

        pof_stack = state.parenthesis_or_fstring_stack
        try:
            if ct_type is _FSTRING_START:
                pof_stack.append(_FSTRING_STACK_ENTRY)
            elif ct_type is _FSTRING_END:
                pof_stack.pop()
            elif ct_type is _OP:
                if ct_string in "([{":
                    pof_stack.append(_PARENTHESIS_STACK_ENTRY)
                elif ct_string in ")]}":
                    pof_stack.pop()
        except IndexError:
            # pof_stack may be empty by the time we need to read from it due to
            # mismatched braces.
            raise ParserSyntaxError(
                "Encountered a closing brace without a matching opening brace.",
                lines=state.lines,
                raw_line=ct_start_pos[0],
                raw_column=ct_start_pos[1],
            )
        is_parenthesized = (len(pof_stack) > 0
                            and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY)

        whitespace_after = WhitespaceState(ct_end_pos[0], ct_end_pos[1],
                                           state.indents[-1], is_parenthesized)

    # Hold onto whitespace_after, so we can use it as whitespace_before in the next
    # node.
    state.previous_whitespace_state = whitespace_after

    return Token(
        ct_type,
        ct_string,
        ct_start_pos,
        ct_end_pos,
        whitespace_before,
        whitespace_after,
        relative_indent,
    )
コード例 #8
0
def tokenize(code: str,
             version_info: PythonVersionInfo) -> Generator[Token, None, None]:
    lines = split_lines(code, keepends=True)
    return tokenize_lines(lines, version_info)