def _fix_format_literal(tokens: List[Token], end: int) -> None: parts = rfind_string_parts(tokens, end) parsed_parts = [] last_int = -1 for i in parts: # f'foo {0}'.format(...) would get turned into a SyntaxError prefix, _ = parse_string_literal(tokens[i].src) if 'f' in prefix.lower(): return try: parsed = parse_format(tokens[i].src) except ValueError: # the format literal was malformed, skip it return # The last segment will always be the end of the string and not a # format, slice avoids the `None` format key for _, fmtkey, spec, _ in parsed[:-1]: if ( fmtkey is not None and inty(fmtkey) and int(fmtkey) == last_int + 1 and spec is not None and '{' not in spec ): last_int += 1 else: return parsed_parts.append(tuple(_remove_fmt(tup) for tup in parsed)) for i, parsed in zip(parts, parsed_parts): tokens[i] = tokens[i]._replace(src=unparse_parsed_string(parsed))
def _fix_encode_to_binary(tokens: List[Token], i: int) -> None: parts = rfind_string_parts(tokens, i - 2) if not parts: return # .encode() if ( i + 2 < len(tokens) and tokens[i + 1].src == '(' and tokens[i + 2].src == ')' ): victims = slice(i - 1, i + 3) latin1_ok = False # .encode('encoding') elif ( i + 3 < len(tokens) and tokens[i + 1].src == '(' and tokens[i + 2].name == 'STRING' and tokens[i + 3].src == ')' ): victims = slice(i - 1, i + 4) prefix, rest = parse_string_literal(tokens[i + 2].src) if 'f' in prefix.lower(): return encoding = ast.literal_eval(prefix + rest) if is_codec(encoding, 'ascii') or is_codec(encoding, 'utf-8'): latin1_ok = False elif is_codec(encoding, 'iso8859-1'): latin1_ok = True else: return else: return for part in parts: prefix, rest = parse_string_literal(tokens[part].src) escapes = set(ESCAPE_RE.findall(rest)) if ( not is_ascii(rest) or '\\u' in escapes or '\\U' in escapes or '\\N' in escapes or ('\\x' in escapes and not latin1_ok) or 'f' in prefix.lower() ): return for part in parts: prefix, rest = parse_string_literal(tokens[part].src) prefix = 'b' + prefix.replace('u', '').replace('U', '') tokens[part] = tokens[part]._replace(src=prefix + rest) del tokens[victims]
def test_rfind_string_parts_parenthesized(src, n, expected_i): tokens = src_to_tokens(src) assert rfind_string_parts(tokens, n) == (expected_i, )
def test_rfind_string_parts_end_of_call_looks_like_string(src, n): tokens = src_to_tokens(src) assert rfind_string_parts(tokens, n) == ()
def test_rfind_string_parts_not_a_string(): tokens = src_to_tokens('print') assert rfind_string_parts(tokens, 0) == ()
def test_rfind_string_parts_multiple_tokens(src, n, expected): tokens = src_to_tokens(src) assert rfind_string_parts(tokens, n) == expected
def test_rfind_string_parts_only_literal(src): tokens = src_to_tokens(src) assert rfind_string_parts(tokens, 0) == (0, )