Beispiel #1
0
def _fix_fstrings(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindSimpleFormats()
    visitor.visit(ast_obj)

    tokens = src_to_tokens(contents_text)
    for i, token in reversed(tuple(enumerate(tokens))):
        node = visitor.found.get(Offset(token.line, token.utf8_byte_offset))
        if node is None:
            continue

        if _is_bytestring(token.src):  # pragma: no cover (py2-only)
            continue

        paren = i + 3
        if tokens_to_src(tokens[i + 1:paren + 1]) != '.format(':
            continue

        # we don't actually care about arg position, so we pass `node`
        victims = _victims(tokens, paren, node, gen=False)
        end = victims.ends[-1]
        # if it spans more than one line, bail
        if tokens[end].line != token.line:
            continue

        tokens[i] = token._replace(src=_to_fstring(token.src, node))
        del tokens[i + 1:end + 1]

    return tokens_to_src(tokens)
Beispiel #2
0
def _fix_format_literals(contents_text):
    tokens = src_to_tokens(contents_text)

    to_replace = []
    string_start = None
    string_end = None
    seen_dot = False

    for i, token in enumerate(tokens):
        if string_start is None and token.name == 'STRING':
            string_start = i
            string_end = i + 1
        elif string_start is not None and token.name == 'STRING':
            string_end = i + 1
        elif string_start is not None and token.src == '.':
            seen_dot = True
        elif seen_dot and token.src == 'format':
            to_replace.append((string_start, string_end))
            string_start, string_end, seen_dot = None, None, False
        elif token.name not in NON_CODING_TOKENS:
            string_start, string_end, seen_dot = None, None, False

    for start, end in reversed(to_replace):
        src = tokens_to_src(tokens[start:end])
        new_src = _rewrite_string_literal(src)
        tokens[start:end] = [Token('STRING', new_src)]

    return tokens_to_src(tokens)
Beispiel #3
0
def _to_fstring(
    src: str, tokens: List[Token], args: List[Tuple[int, int]],
) -> str:
    params = {}
    i = 0
    for start, end in args:
        start = _skip_unimportant_ws(tokens, start)
        if tokens[start].name == 'NAME':
            after = _skip_unimportant_ws(tokens, start + 1)
            if tokens[after].src == '=':  # keyword argument
                params[tokens[start].src] = tokens_to_src(
                    tokens[after + 1:end],
                ).strip()
                continue
        params[str(i)] = tokens_to_src(tokens[start:end]).strip()
        i += 1

    parts = []
    i = 0
    for s, name, spec, conv in parse_format('f' + src):
        if name is not None:
            k, dot, rest = name.partition('.')
            name = ''.join((params[k or str(i)], dot, rest))
            if not k:  # named and auto params can be in different orders
                i += 1
        parts.append((s, name, spec, conv))
    return unparse_parsed_string(parts)
Beispiel #4
0
def fix_file(filename: str, show_diff: bool = False, dry_run: bool = False) -> int:
    with open(filename, 'rb') as f:
        contents_bytes = f.read()

    try:
        contents_text = contents_bytes.decode()
    except UnicodeDecodeError:
        print(f'{filename} is non-utf8 (not supported)')
        return 1

    tokens = tokenize_rt.src_to_tokens(contents_text)

    tokens_no_comments = _remove_comments(tokens)
    src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments)

    if src_no_comments == contents_text:
        return 0

    with tempfile.NamedTemporaryFile(
        dir=os.path.dirname(filename),
        prefix=os.path.basename(filename),
        suffix='.py',
    ) as tmpfile:
        tmpfile.write(src_no_comments.encode())
        tmpfile.flush()
        flake8_results = _run_flake8(tmpfile.name)

    if any('E999' in v for v in flake8_results.values()):
        print(f'{filename}: syntax error (skipping)')
        return 0

    for i, token in tokenize_rt.reversed_enumerate(tokens):
        if token.name != 'COMMENT':
            continue

        if NOQA_RE.search(token.src):
            _rewrite_noqa_comment(tokens, i, flake8_results)
        elif NOQA_FILE_RE.match(token.src) and not flake8_results:
            if i == 0 or tokens[i - 1].name == 'NEWLINE':
                del tokens[i: i + 2]
            else:
                _remove_comment(tokens, i)

    newsrc = tokenize_rt.tokens_to_src(tokens)
    if newsrc != contents_text:
        if (show_diff or dry_run):
            diff = difflib.unified_diff(
                contents_text.splitlines(keepends=True),
                newsrc.splitlines(keepends=True),
                fromfile=filename,
                tofile=filename,
            )
            print(''.join(diff), end='')
        if (not dry_run):
            print(f'Rewriting {filename}')
            with open(filename, 'wb') as f:
                f.write(newsrc.encode())
        return 1
    else:
        return 0
Beispiel #5
0
def fix_file(filename: str) -> int:
    with open(filename, 'rb') as f:
        contents_bytes = f.read()

    try:
        contents_text = contents_bytes.decode()
    except UnicodeDecodeError:
        print(f'{filename} is non-utf8 (not supported)')
        return 1

    tokens = tokenize_rt.src_to_tokens(contents_text)

    tokens_no_comments = _remove_comments(tokens)
    src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments)

    if src_no_comments == contents_text:
        return 0

    fd, path = tempfile.mkstemp(
        dir=os.path.dirname(filename),
        prefix=os.path.basename(filename),
        suffix='.py',
    )
    try:
        with open(fd, 'wb') as f:
            f.write(src_no_comments.encode())
        flake8_results = _run_flake8(path)
    finally:
        os.remove(path)

    if any('E999' in v for v in flake8_results.values()):
        print(f'{filename}: syntax error (skipping)')
        return 0

    for i, token in tokenize_rt.reversed_enumerate(tokens):
        if token.name != 'COMMENT':
            continue

        if NOQA_RE.search(token.src):
            _rewrite_noqa_comment(tokens, i, flake8_results)
        elif NOQA_FILE_RE.match(token.src) and not flake8_results:
            if i == 0 or tokens[i - 1].name == 'NEWLINE':
                del tokens[i:i + 2]
            else:
                _remove_comment(tokens, i)

    newsrc = tokenize_rt.tokens_to_src(tokens)
    if newsrc != contents_text:
        print(f'Rewriting {filename}')
        with open(filename, 'wb') as f:
            f.write(newsrc.encode())
        return 1
    else:
        return 0
Beispiel #6
0
def _replace_call(tokens, start, end, args, tmpl):
    arg_strs = [tokens_to_src(tokens[slice(*arg)]).strip() for arg in args]

    start_rest = args[0][1] + 1
    while (start_rest < end
           and tokens[start_rest].name in {'COMMENT', UNIMPORTANT_WS}):
        start_rest += 1

    rest = tokens_to_src(tokens[start_rest:end - 1])
    src = tmpl.format(args=arg_strs, rest=rest)
    tokens[start:end] = [Token('CODE', src)]
Beispiel #7
0
def _fix_percent_format_dict(tokens, start, node):
    seen_keys = set()
    keys = {}
    for k in node.right.keys:
        # not a string key
        if not isinstance(k, ast.Str):
            return
        # duplicate key
        elif k.s in seen_keys:
            return
        # not an identifier
        elif not IDENT_RE.match(k.s):
            return
        # a keyword
        elif k.s in keyword.kwlist:
            return
        seen_keys.add(k.s)
        keys[_ast_to_offset(k)] = k

    # TODO: this is overly timid
    brace = start + 4
    if tokens_to_src(tokens[start + 1:brace + 1]) != ' % {':
        return

    victims = _victims(tokens, brace, node.right, gen=False)
    brace_end = victims.ends[-1]

    key_indices = []
    for i, token in enumerate(tokens[brace:brace_end], brace):
        k = keys.pop(token.offset, None)
        if k is None:
            continue
        # we found the key, but the string didn't match (implicit join?)
        elif ast.literal_eval(token.src) != k.s:
            return
        # the map uses some strange syntax that's not `'k': v`
        elif tokens_to_src(tokens[i + 1:i + 3]) != ': ':
            return
        else:
            key_indices.append((i, k.s))
    assert not keys, keys

    tokens[brace_end] = tokens[brace_end]._replace(src=')')
    for (key_index, s) in reversed(key_indices):
        tokens[key_index:key_index + 3] = [Token('CODE', '{}='.format(s))]
    newsrc = _percent_to_format(tokens[start].src)
    tokens[start] = tokens[start]._replace(src=newsrc)
    tokens[start + 1:brace + 1] = [Token('CODE', '.format'), Token('OP', '(')]
def _fix_six(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindSixUsage()
    visitor.visit(ast_obj)

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.simple_names:
            node = visitor.simple_names[token.offset]
            tokens[i] = Token('CODE', SIX_SIMPLE_ATTRS[node.id])
        elif token.offset in visitor.simple_attrs:
            node = visitor.simple_attrs[token.offset]
            if tokens[i + 1].src == '.' and tokens[i + 2].src == node.attr:
                tokens[i:i + 3] = [Token('CODE', SIX_SIMPLE_ATTRS[node.attr])]
        elif token.offset in visitor.remove_decorators:
            if tokens[i - 1].src == '@':
                end = i + 1
                while tokens[end].name != 'NEWLINE':
                    end += 1
                del tokens[i - 1:end + 1]

    return tokens_to_src(tokens)
Beispiel #9
0
def _upgrade(source: str) -> str:
    ast_obj = _ast_parse(source)
    visitor = _FindAssignment()
    visitor.visit(ast_obj)
    tokens = src_to_tokens(source)
    _mutate_found(tokens, visitor)
    return tokens_to_src(tokens)
Beispiel #10
0
def parse_call_args(
    tokens: list[Token],
    i: int,
) -> tuple[list[tuple[int, int]], int]:
    """
    Given the index of the opening bracket of a function call, step through
    and parse its arguments into a list of tuples of start, end indices.
    Return this list plus the position of the token after.
    """
    args = []
    stack = [i]
    i += 1
    arg_start = i

    while stack:
        token = tokens[i]

        if len(stack) == 1 and token.src == ",":
            args.append((arg_start, i))
            arg_start = i + 1
        elif token.src in BRACES:
            stack.append(i)
        elif token.src == BRACES[tokens[stack[-1]].src]:
            stack.pop()
            # if we're at the end, append that argument
            if not stack and tokens_to_src(tokens[arg_start:i]).strip():
                args.append((arg_start, i))

        i += 1

    return args, i
Beispiel #11
0
def _has_trailing_semicolon(src: str) -> Tuple[str, bool]:
    """
    Check if cell has trailing semicolon.

    Parameters
    ----------
    src
        Notebook cell source.

    Returns
    -------
    bool
        Whether notebook has trailing semicolon.
    """
    tokens = tokenize_rt.src_to_tokens(src)
    trailing_semicolon = False
    for idx, token in tokenize_rt.reversed_enumerate(tokens):
        if not token.src.strip(" \n") or token.name == "COMMENT":
            continue
        if token.name == "OP" and token.src == ";":
            tokens[idx] = token._replace(src="")
            trailing_semicolon = True
        break
    if not trailing_semicolon:
        return src, False
    return tokenize_rt.tokens_to_src(tokens), True
Beispiel #12
0
def replace_call(
        tokens: List[Token],
        start: int,
        end: int,
        args: List[Tuple[int, int]],
        tmpl: str,
        *,
        parens: Sequence[int] = (),
) -> None:
    arg_strs = [arg_str(tokens, *arg) for arg in args]
    for paren in parens:
        arg_strs[paren] = f'({arg_strs[paren]})'

    start_rest = args[0][1] + 1
    while (start_rest < end
           and tokens[start_rest].name in {'COMMENT', UNIMPORTANT_WS}):
        start_rest += 1

    # Remove trailing comma
    end_rest = end - 1
    while (tokens[end_rest - 1].name == 'OP'
           and tokens[end_rest - 1].src == ','):
        end_rest -= 1

    rest = tokens_to_src(tokens[start_rest:end_rest])
    src = tmpl.format(args=arg_strs, rest=rest)
    tokens[start:end] = [Token('CODE', src)]
Beispiel #13
0
def parse_call_args(
    tokens: List[Token],
    i: int,
) -> Tuple[List[Tuple[int, int]], int]:
    args = []
    stack = [i]
    i += 1
    arg_start = i

    while stack:
        token = tokens[i]

        if len(stack) == 1 and token.src == ',':
            args.append((arg_start, i))
            arg_start = i + 1
        elif token.src in BRACES:
            stack.append(i)
        elif token.src == BRACES[tokens[stack[-1]].src]:
            stack.pop()
            # if we're at the end, append that argument
            if not stack and tokens_to_src(tokens[arg_start:i]).strip():
                args.append((arg_start, i))

        i += 1

    return args, i
def decode(b, errors='strict'):
    import tokenize_rt  # pip install future-fstrings[rewrite]

    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)

    to_replace = []
    start = end = seen_f = None

    for i, token in enumerate(tokens):
        if start is None:
            if token.name == 'STRING':
                start, end = i, i + 1
                seen_f = _is_f(token)
        elif token.name == 'STRING':
            end = i + 1
            seen_f |= _is_f(token)
        elif token.name not in tokenize_rt.NON_CODING_TOKENS:
            if seen_f:
                to_replace.append((start, end))
            start = end = seen_f = None

    for start, end in reversed(to_replace):
        try:
            tokens[start:end] = _make_fstring(tokens[start:end])
        except TokenSyntaxError as e:
            msg = str(e.e)
            line = u.splitlines()[e.token.line - 1]
            bts = line.encode('UTF-8')[:e.token.utf8_byte_offset]
            indent = len(bts.decode('UTF-8'))
            raise SyntaxError(msg + '\n\n' + line + '\n' + ' ' * indent + '^')
    return tokenize_rt.tokens_to_src(tokens), length
Beispiel #15
0
def _fix_percent_format(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindPercentFormats()
    visitor.visit(ast_obj)

    if not visitor.found:
        return contents_text

    tokens = src_to_tokens(contents_text)

    for i, token in reversed_enumerate(tokens):
        node = visitor.found.get(token.offset)
        if node is None:
            continue

        # no .format() equivalent for bytestrings in py3
        # note that this code is only necessary when running in python2
        if _is_bytestring(tokens[i].src):  # pragma: no cover (py2-only)
            continue

        if isinstance(node.right, ast.Tuple):
            _fix_percent_format_tuple(tokens, i, node)
        elif isinstance(node.right, ast.Dict):
            _fix_percent_format_dict(tokens, i, node)

    return tokens_to_src(tokens)
Beispiel #16
0
def _fix_plugins(contents_text: str, settings: Settings) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    callbacks = visit(FUNCS, ast_obj, settings)

    if not callbacks:
        return contents_text

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:  # pragma: no cover (bpo-2180)
        return contents_text

    _fixup_dedent_tokens(tokens)

    for i, token in reversed_enumerate(tokens):
        if not token.src:
            continue
        # though this is a defaultdict, by using `.get()` this function's
        # self time is almost 50% faster
        for callback in callbacks.get(token.offset, ()):
            callback(i, tokens)

    return tokens_to_src(tokens)
Beispiel #17
0
def _fix_src(contents_text: str, min_version: Tuple[int, ...]) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    callbacks = visit(FUNCS, ast_obj, min_version)

    tokens = src_to_tokens(contents_text)
    for i, token in _changing_list(tokens):
        # DEDENT is a zero length token
        if not token.src:
            continue

        # though this is a defaultdict, by using `.get()` this function's
        # self time is almost 50% faster
        for callback in callbacks.get(token.offset, ()):
            callback(i, tokens)

        if token.src in START_BRACES:
            fix_brace(
                tokens,
                find_simple(i, tokens),
                add_comma=False,
                remove_comma=False,
            )

    return tokens_to_src(tokens)
def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
    """Remove trailing semicolon from Jupyter notebook cell.

    For example,

        fig, ax = plt.subplots()
        ax.plot(x_data, y_data);  # plot data

    would become

        fig, ax = plt.subplots()
        ax.plot(x_data, y_data)  # plot data

    Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
    ``tokenize_rt`` so that round-tripping works fine.
    """
    from tokenize_rt import (
        src_to_tokens,
        tokens_to_src,
        reversed_enumerate,
    )

    tokens = src_to_tokens(src)
    trailing_semicolon = False
    for idx, token in reversed_enumerate(tokens):
        if token.name in TOKENS_TO_IGNORE:
            continue
        if token.name == "OP" and token.src == ";":
            del tokens[idx]
            trailing_semicolon = True
        break
    if not trailing_semicolon:
        return src, False
    return tokens_to_src(tokens), True
Beispiel #19
0
def _fix_tokens(contents_text: str, min_version: Version) -> str:
    remove_u = (min_version >= (3, )
                or _imports_future(contents_text, 'unicode_literals'))

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:
        return contents_text
    for i, token in reversed_enumerate(tokens):
        if token.name == 'NUMBER':
            tokens[i] = token._replace(src=_fix_long(_fix_octal(token.src)))
        elif token.name == 'STRING':
            tokens[i] = _fix_ur_literals(tokens[i])
            if remove_u:
                tokens[i] = _remove_u_prefix(tokens[i])
            tokens[i] = _fix_escape_sequences(tokens[i])
        elif token.src == '(':
            _fix_extraneous_parens(tokens, i)
        elif token.src == 'format' and i > 0 and tokens[i - 1].src == '.':
            _fix_format_literal(tokens, i - 2)
        elif token.src == 'encode' and i > 0 and tokens[i - 1].src == '.':
            _fix_encode_to_binary(tokens, i)
        elif (min_version >= (3, ) and token.utf8_byte_offset == 0
              and token.line < 3 and token.name == 'COMMENT'
              and tokenize.cookie_re.match(token.src)):
            del tokens[i]
            assert tokens[i].name == 'NL', tokens[i].name
            del tokens[i]
        elif token.src == 'from' and token.utf8_byte_offset == 0:
            _fix_import_removals(tokens, i, min_version)
    return tokens_to_src(tokens).lstrip()
Beispiel #20
0
def _fix_py2_compatible(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text
    visitor = Py2CompatibleVisitor()
    visitor.visit(ast_obj)
    if not any((
            visitor.dicts,
            visitor.sets,
            visitor.set_empty_literals,
            visitor.is_literal,
    )):
        return contents_text

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.dicts:
            _process_dict_comp(tokens, i, visitor.dicts[token.offset])
        elif token.offset in visitor.set_empty_literals:
            _process_set_empty_literal(tokens, i)
        elif token.offset in visitor.sets:
            _process_set_literal(tokens, i, visitor.sets[token.offset])
        elif token.offset in visitor.is_literal:
            _process_is_literal(tokens, i, visitor.is_literal[token.offset])
    return tokens_to_src(tokens)
Beispiel #21
0
def _fix_percent_format_dict(
    i: int,
    tokens: List[Token],
    *,
    node_right: ast.Dict,
) -> None:
    # TODO: handle \N escape sequences
    if r'\N' in tokens[i].src:
        return

    seen_keys: Set[str] = set()
    keys = {}

    for k in node_right.keys:
        # not a string key
        if not isinstance(k, ast.Str):
            return
        # duplicate key
        elif k.s in seen_keys:
            return
        # not an identifier
        elif not k.s.isidentifier():
            return
        # a keyword
        elif k.s in KEYWORDS:
            return
        seen_keys.add(k.s)
        keys[ast_to_offset(k)] = k

    # TODO: this is overly timid
    brace = i + 4
    if tokens_to_src(tokens[i + 1:brace + 1]) != ' % {':
        return

    fmt_victims = victims(tokens, brace, node_right, gen=False)
    brace_end = fmt_victims.ends[-1]

    key_indices = []
    for j, token in enumerate(tokens[brace:brace_end], brace):
        key = keys.pop(token.offset, None)
        if key is None:
            continue
        # we found the key, but the string didn't match (implicit join?)
        elif ast.literal_eval(token.src) != key.s:
            return
        # the map uses some strange syntax that's not `'key': value`
        elif tokens[j + 1].src != ':' or tokens[j + 2].src != ' ':
            return
        else:
            key_indices.append((j, key.s))
    assert not keys, keys

    tokens[brace_end] = tokens[brace_end]._replace(src=')')
    for key_index, s in reversed(key_indices):
        tokens[key_index:key_index + 3] = [Token('CODE', f'{s}=')]
    newsrc = _percent_to_format(tokens[i].src)
    tokens[i] = tokens[i]._replace(src=newsrc)
    tokens[i + 1:brace + 1] = [Token('CODE', '.format'), Token('OP', '(')]
Beispiel #22
0
Datei: t.py Projekt: asottile/t
def _fix_calls(contents_text: str) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = Visitor()
    visitor.visit(ast_obj)

    if not visitor.calls:
        return contents_text

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:  # pragma: no cover (bpo-2180)
        return contents_text

    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.calls:
            visitor.calls.discard(token.offset)

            # search forward for the opening brace
            while tokens[i].src != '(':
                i += 1

            call_start = i
            i += 1
            brace_depth = 1
            start = -1
            end = -1

            while brace_depth:
                if tokens[i].src in {'(', '{', '['}:
                    if brace_depth == 1:
                        start = i
                    brace_depth += 1
                elif tokens[i].src in {')', '}', ']'}:
                    brace_depth -= 1
                    if brace_depth == 1:
                        end = i
                i += 1

            assert start != -1
            assert end != -1
            call_end = i - 1

            # dedent everything inside the brackets
            for i in range(call_start, call_end):
                if (tokens[i - 1].name == 'NL'
                        and tokens[i].name == UNIMPORTANT_WS):
                    tokens[i] = tokens[i]._replace(src=tokens[i].src[4:])

            del tokens[end + 1:call_end]
            del tokens[call_start + 1:start]

    return tokens_to_src(tokens)
Beispiel #23
0
def fix_file(filename):
    with open(filename, 'rb') as f:
        contents_bytes = f.read()

    try:
        contents_text = contents_bytes.decode('UTF-8')
    except UnicodeDecodeError:
        print('{} is non-utf8 (not supported)'.format(filename))
        return 1

    tokens = tokenize_rt.src_to_tokens(contents_text)

    tokens_no_comments = _remove_comments(tokens)
    src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments)
    with tempfile.NamedTemporaryFile(
        dir=os.path.dirname(filename),
        prefix=os.path.basename(filename),
        suffix='.py',
    ) as tmpfile:
        tmpfile.write(src_no_comments.encode('UTF-8'))
        tmpfile.flush()
        flake8_results = _run_flake8(tmpfile.name)

    for i, token in reversed(tuple(enumerate(tokens))):
        if token.name != 'COMMENT':
            continue

        if NOQA_RE.search(token.src):
            _rewrite_noqa_comment(tokens, i, flake8_results)
        elif NOQA_FILE_RE.match(token.src) and not flake8_results:
            if i == 0 or tokens[i - 1].name == 'NEWLINE':
                del tokens[i: i + 2]
            else:
                _remove_comment(tokens, i)

    newsrc = tokenize_rt.tokens_to_src(tokens)
    if newsrc != contents_text:
        print('Rewriting {}'.format(filename))
        with open(filename, 'wb') as f:
            f.write(newsrc.encode('UTF-8'))
        return 1
    else:
        return 0
Beispiel #24
0
def decode(b, errors="strict"):

    non_coding_tokens = frozenset(
        ("COMMENT", tokenize_rt.ESCAPED_NL, "NL", tokenize_rt.UNIMPORTANT_WS))

    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)

    to_replace = []

    started = -1
    end = -1

    for i in range(0, 1 + len(tokens)):
        if i < len(tokens):
            token = tokens[i]
        else:
            token = None

        if token:
            if fstr(token):
                if started < 0:
                    started = i
                continue
            end = i

        if started >= 0:
            if peek_is_fstr(tokens, i + 1):
                continue
            if peek_is_str(tokens, i + 1):
                continue

            if token is None:
                pass
            elif token.name in non_coding_tokens or peek_is_str(tokens, i):
                #multiline f-string+str
                continue

            to_replace.append((started, end))
            started = -1

    for start, end in reversed(to_replace):
        if end - start > 1:
            #move ending line away from format of multiline fstrings
            if tokens[end - 1].name in non_coding_tokens:
                end -= 1
        try:
            tokens[start:end] = _make_fstring(tokens[start:end])
        except TokenSyntaxError as e:
            msg = str(e.e)
            line = u.splitlines()[e.token.line - 1]
            bts = line.encode("UTF-8")[:e.token.utf8_byte_offset]
            indent = len(bts.decode("UTF-8"))
            raise SyntaxError(msg + "\n\n" + line + "\n" + " " * indent + "^")
    return tokenize_rt.tokens_to_src(tokens), length
Beispiel #25
0
def decode(b, errors='strict'):
    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)
    new_tokens = []
    for token in tokens:
        if token.name == 'NUMBER':
            new_tokens.extend(
                tokenize_rt.src_to_tokens("blurse({})".format(token.src)))
        else:
            new_tokens.append(token)
    return tokenize_rt.tokens_to_src(new_tokens), length
Beispiel #26
0
def _fix_octal_literals(contents_text):
    def _fix_octal(s):
        if not s.startswith('0') or not s.isdigit() or s == len(s) * '0':
            return s
        else:  # pragma: no cover (py2 only)
            return '0o' + s[1:]

    tokens = src_to_tokens(contents_text)
    for i, token in enumerate(tokens):
        if token.name == 'NUMBER':
            tokens[i] = token._replace(src=_fix_octal(token.src))
    return tokens_to_src(tokens)
Beispiel #27
0
def _fix_unicode_literals(contents_text, py3_plus):
    if not py3_plus and not _imports_unicode_literals(contents_text):
        return contents_text
    tokens = src_to_tokens(contents_text)
    for i, token in enumerate(tokens):
        if token.name != 'STRING':
            continue

        match = STRING_PREFIXES_RE.match(token.src)
        prefix = match.group(1)
        rest = match.group(2)
        new_prefix = prefix.replace('u', '').replace('U', '')
        tokens[i] = Token('STRING', new_prefix + rest)
    return tokens_to_src(tokens)
Beispiel #28
0
def _fix_open_mode(i: int, tokens: List[Token]) -> None:
    j = find_open_paren(tokens, i)
    func_args, end = parse_call_args(tokens, j)
    mode = tokens_to_src(tokens[slice(*func_args[1])])
    mode_stripped = mode.strip().strip('"\'')
    if mode_stripped in U_MODE_REMOVE:
        del tokens[func_args[0][1]:func_args[1][1]]
    elif mode_stripped in U_MODE_REPLACE_R:
        new_mode = mode.replace('U', 'r')
        tokens[slice(*func_args[1])] = [Token('SRC', new_mode)]
    elif mode_stripped in U_MODE_REMOVE_U:
        new_mode = mode.replace('U', '')
        tokens[slice(*func_args[1])] = [Token('SRC', new_mode)]
    else:
        raise AssertionError(f'unreachable: {mode!r}')
def decode(b: bytes, errors: str = 'strict') -> Tuple[str, int]:
    u, length = utf_8.decode(b, errors)

    # replace encoding cookie so there isn't a recursion problem
    lines = u.splitlines(True)
    for idx in (0, 1):
        if idx >= len(lines):
            break
        lines[idx] = tokenize.cookie_re.sub(_new_coding_cookie, lines[idx])
    u = ''.join(lines)

    visitor = Visitor()
    visitor.visit(_ast_parse(u))

    tokens = tokenize_rt.src_to_tokens(u)
    for i, token in tokenize_rt.reversed_enumerate(tokens):
        if token.offset in visitor.offsets:
            # look forward for a `:`, `,`, `=`, ')'
            depth = 0
            j = i + 1
            while depth or tokens[j].src not in {':', ',', '=', ')', '\n'}:
                if tokens[j].src in {'(', '{', '['}:
                    depth += 1
                elif tokens[j].src in {')', '}', ']'}:
                    depth -= 1
                j += 1
            j -= 1

            # look backward to delete whitespace / comments / etc.
            while tokens[j].name in tokenize_rt.NON_CODING_TOKENS:
                j -= 1

            quoted = repr(tokenize_rt.tokens_to_src(tokens[i:j + 1]))
            tokens[i:j + 1] = [tokenize_rt.Token('STRING', quoted)]

    return tokenize_rt.tokens_to_src(tokens), length
Beispiel #30
0
def _fix_percent_format_tuple(tokens, start, node):
    # TODO: this is overly timid
    paren = start + 4
    if tokens_to_src(tokens[start + 1:paren + 1]) != ' % (':
        return

    victims = _victims(tokens, paren, node.right, gen=False)
    victims.ends.pop()

    for index in reversed(victims.starts + victims.ends):
        _remove_brace(tokens, index)

    newsrc = _percent_to_format(tokens[start].src)
    tokens[start] = tokens[start]._replace(src=newsrc)
    tokens[start + 1:paren] = [Token('Format', '.format'), Token('OP', '(')]