def _fix_fstrings(contents_text): try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text visitor = FindSimpleFormats() visitor.visit(ast_obj) tokens = src_to_tokens(contents_text) for i, token in reversed(tuple(enumerate(tokens))): node = visitor.found.get(Offset(token.line, token.utf8_byte_offset)) if node is None: continue if _is_bytestring(token.src): # pragma: no cover (py2-only) continue paren = i + 3 if tokens_to_src(tokens[i + 1:paren + 1]) != '.format(': continue # we don't actually care about arg position, so we pass `node` victims = _victims(tokens, paren, node, gen=False) end = victims.ends[-1] # if it spans more than one line, bail if tokens[end].line != token.line: continue tokens[i] = token._replace(src=_to_fstring(token.src, node)) del tokens[i + 1:end + 1] return tokens_to_src(tokens)
def _fix_format_literals(contents_text): tokens = src_to_tokens(contents_text) to_replace = [] string_start = None string_end = None seen_dot = False for i, token in enumerate(tokens): if string_start is None and token.name == 'STRING': string_start = i string_end = i + 1 elif string_start is not None and token.name == 'STRING': string_end = i + 1 elif string_start is not None and token.src == '.': seen_dot = True elif seen_dot and token.src == 'format': to_replace.append((string_start, string_end)) string_start, string_end, seen_dot = None, None, False elif token.name not in NON_CODING_TOKENS: string_start, string_end, seen_dot = None, None, False for start, end in reversed(to_replace): src = tokens_to_src(tokens[start:end]) new_src = _rewrite_string_literal(src) tokens[start:end] = [Token('STRING', new_src)] return tokens_to_src(tokens)
def _to_fstring( src: str, tokens: List[Token], args: List[Tuple[int, int]], ) -> str: params = {} i = 0 for start, end in args: start = _skip_unimportant_ws(tokens, start) if tokens[start].name == 'NAME': after = _skip_unimportant_ws(tokens, start + 1) if tokens[after].src == '=': # keyword argument params[tokens[start].src] = tokens_to_src( tokens[after + 1:end], ).strip() continue params[str(i)] = tokens_to_src(tokens[start:end]).strip() i += 1 parts = [] i = 0 for s, name, spec, conv in parse_format('f' + src): if name is not None: k, dot, rest = name.partition('.') name = ''.join((params[k or str(i)], dot, rest)) if not k: # named and auto params can be in different orders i += 1 parts.append((s, name, spec, conv)) return unparse_parsed_string(parts)
def fix_file(filename: str, show_diff: bool = False, dry_run: bool = False) -> int: with open(filename, 'rb') as f: contents_bytes = f.read() try: contents_text = contents_bytes.decode() except UnicodeDecodeError: print(f'{filename} is non-utf8 (not supported)') return 1 tokens = tokenize_rt.src_to_tokens(contents_text) tokens_no_comments = _remove_comments(tokens) src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments) if src_no_comments == contents_text: return 0 with tempfile.NamedTemporaryFile( dir=os.path.dirname(filename), prefix=os.path.basename(filename), suffix='.py', ) as tmpfile: tmpfile.write(src_no_comments.encode()) tmpfile.flush() flake8_results = _run_flake8(tmpfile.name) if any('E999' in v for v in flake8_results.values()): print(f'{filename}: syntax error (skipping)') return 0 for i, token in tokenize_rt.reversed_enumerate(tokens): if token.name != 'COMMENT': continue if NOQA_RE.search(token.src): _rewrite_noqa_comment(tokens, i, flake8_results) elif NOQA_FILE_RE.match(token.src) and not flake8_results: if i == 0 or tokens[i - 1].name == 'NEWLINE': del tokens[i: i + 2] else: _remove_comment(tokens, i) newsrc = tokenize_rt.tokens_to_src(tokens) if newsrc != contents_text: if (show_diff or dry_run): diff = difflib.unified_diff( contents_text.splitlines(keepends=True), newsrc.splitlines(keepends=True), fromfile=filename, tofile=filename, ) print(''.join(diff), end='') if (not dry_run): print(f'Rewriting {filename}') with open(filename, 'wb') as f: f.write(newsrc.encode()) return 1 else: return 0
def fix_file(filename: str) -> int: with open(filename, 'rb') as f: contents_bytes = f.read() try: contents_text = contents_bytes.decode() except UnicodeDecodeError: print(f'{filename} is non-utf8 (not supported)') return 1 tokens = tokenize_rt.src_to_tokens(contents_text) tokens_no_comments = _remove_comments(tokens) src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments) if src_no_comments == contents_text: return 0 fd, path = tempfile.mkstemp( dir=os.path.dirname(filename), prefix=os.path.basename(filename), suffix='.py', ) try: with open(fd, 'wb') as f: f.write(src_no_comments.encode()) flake8_results = _run_flake8(path) finally: os.remove(path) if any('E999' in v for v in flake8_results.values()): print(f'{filename}: syntax error (skipping)') return 0 for i, token in tokenize_rt.reversed_enumerate(tokens): if token.name != 'COMMENT': continue if NOQA_RE.search(token.src): _rewrite_noqa_comment(tokens, i, flake8_results) elif NOQA_FILE_RE.match(token.src) and not flake8_results: if i == 0 or tokens[i - 1].name == 'NEWLINE': del tokens[i:i + 2] else: _remove_comment(tokens, i) newsrc = tokenize_rt.tokens_to_src(tokens) if newsrc != contents_text: print(f'Rewriting {filename}') with open(filename, 'wb') as f: f.write(newsrc.encode()) return 1 else: return 0
def _replace_call(tokens, start, end, args, tmpl): arg_strs = [tokens_to_src(tokens[slice(*arg)]).strip() for arg in args] start_rest = args[0][1] + 1 while (start_rest < end and tokens[start_rest].name in {'COMMENT', UNIMPORTANT_WS}): start_rest += 1 rest = tokens_to_src(tokens[start_rest:end - 1]) src = tmpl.format(args=arg_strs, rest=rest) tokens[start:end] = [Token('CODE', src)]
def _fix_percent_format_dict(tokens, start, node): seen_keys = set() keys = {} for k in node.right.keys: # not a string key if not isinstance(k, ast.Str): return # duplicate key elif k.s in seen_keys: return # not an identifier elif not IDENT_RE.match(k.s): return # a keyword elif k.s in keyword.kwlist: return seen_keys.add(k.s) keys[_ast_to_offset(k)] = k # TODO: this is overly timid brace = start + 4 if tokens_to_src(tokens[start + 1:brace + 1]) != ' % {': return victims = _victims(tokens, brace, node.right, gen=False) brace_end = victims.ends[-1] key_indices = [] for i, token in enumerate(tokens[brace:brace_end], brace): k = keys.pop(token.offset, None) if k is None: continue # we found the key, but the string didn't match (implicit join?) elif ast.literal_eval(token.src) != k.s: return # the map uses some strange syntax that's not `'k': v` elif tokens_to_src(tokens[i + 1:i + 3]) != ': ': return else: key_indices.append((i, k.s)) assert not keys, keys tokens[brace_end] = tokens[brace_end]._replace(src=')') for (key_index, s) in reversed(key_indices): tokens[key_index:key_index + 3] = [Token('CODE', '{}='.format(s))] newsrc = _percent_to_format(tokens[start].src) tokens[start] = tokens[start]._replace(src=newsrc) tokens[start + 1:brace + 1] = [Token('CODE', '.format'), Token('OP', '(')]
def _fix_six(contents_text): try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text visitor = FindSixUsage() visitor.visit(ast_obj) tokens = src_to_tokens(contents_text) for i, token in reversed_enumerate(tokens): if token.offset in visitor.simple_names: node = visitor.simple_names[token.offset] tokens[i] = Token('CODE', SIX_SIMPLE_ATTRS[node.id]) elif token.offset in visitor.simple_attrs: node = visitor.simple_attrs[token.offset] if tokens[i + 1].src == '.' and tokens[i + 2].src == node.attr: tokens[i:i + 3] = [Token('CODE', SIX_SIMPLE_ATTRS[node.attr])] elif token.offset in visitor.remove_decorators: if tokens[i - 1].src == '@': end = i + 1 while tokens[end].name != 'NEWLINE': end += 1 del tokens[i - 1:end + 1] return tokens_to_src(tokens)
def _upgrade(source: str) -> str: ast_obj = _ast_parse(source) visitor = _FindAssignment() visitor.visit(ast_obj) tokens = src_to_tokens(source) _mutate_found(tokens, visitor) return tokens_to_src(tokens)
def parse_call_args( tokens: list[Token], i: int, ) -> tuple[list[tuple[int, int]], int]: """ Given the index of the opening bracket of a function call, step through and parse its arguments into a list of tuples of start, end indices. Return this list plus the position of the token after. """ args = [] stack = [i] i += 1 arg_start = i while stack: token = tokens[i] if len(stack) == 1 and token.src == ",": args.append((arg_start, i)) arg_start = i + 1 elif token.src in BRACES: stack.append(i) elif token.src == BRACES[tokens[stack[-1]].src]: stack.pop() # if we're at the end, append that argument if not stack and tokens_to_src(tokens[arg_start:i]).strip(): args.append((arg_start, i)) i += 1 return args, i
def _has_trailing_semicolon(src: str) -> Tuple[str, bool]: """ Check if cell has trailing semicolon. Parameters ---------- src Notebook cell source. Returns ------- bool Whether notebook has trailing semicolon. """ tokens = tokenize_rt.src_to_tokens(src) trailing_semicolon = False for idx, token in tokenize_rt.reversed_enumerate(tokens): if not token.src.strip(" \n") or token.name == "COMMENT": continue if token.name == "OP" and token.src == ";": tokens[idx] = token._replace(src="") trailing_semicolon = True break if not trailing_semicolon: return src, False return tokenize_rt.tokens_to_src(tokens), True
def replace_call( tokens: List[Token], start: int, end: int, args: List[Tuple[int, int]], tmpl: str, *, parens: Sequence[int] = (), ) -> None: arg_strs = [arg_str(tokens, *arg) for arg in args] for paren in parens: arg_strs[paren] = f'({arg_strs[paren]})' start_rest = args[0][1] + 1 while (start_rest < end and tokens[start_rest].name in {'COMMENT', UNIMPORTANT_WS}): start_rest += 1 # Remove trailing comma end_rest = end - 1 while (tokens[end_rest - 1].name == 'OP' and tokens[end_rest - 1].src == ','): end_rest -= 1 rest = tokens_to_src(tokens[start_rest:end_rest]) src = tmpl.format(args=arg_strs, rest=rest) tokens[start:end] = [Token('CODE', src)]
def parse_call_args( tokens: List[Token], i: int, ) -> Tuple[List[Tuple[int, int]], int]: args = [] stack = [i] i += 1 arg_start = i while stack: token = tokens[i] if len(stack) == 1 and token.src == ',': args.append((arg_start, i)) arg_start = i + 1 elif token.src in BRACES: stack.append(i) elif token.src == BRACES[tokens[stack[-1]].src]: stack.pop() # if we're at the end, append that argument if not stack and tokens_to_src(tokens[arg_start:i]).strip(): args.append((arg_start, i)) i += 1 return args, i
def decode(b, errors='strict'): import tokenize_rt # pip install future-fstrings[rewrite] u, length = utf_8.decode(b, errors) tokens = tokenize_rt.src_to_tokens(u) to_replace = [] start = end = seen_f = None for i, token in enumerate(tokens): if start is None: if token.name == 'STRING': start, end = i, i + 1 seen_f = _is_f(token) elif token.name == 'STRING': end = i + 1 seen_f |= _is_f(token) elif token.name not in tokenize_rt.NON_CODING_TOKENS: if seen_f: to_replace.append((start, end)) start = end = seen_f = None for start, end in reversed(to_replace): try: tokens[start:end] = _make_fstring(tokens[start:end]) except TokenSyntaxError as e: msg = str(e.e) line = u.splitlines()[e.token.line - 1] bts = line.encode('UTF-8')[:e.token.utf8_byte_offset] indent = len(bts.decode('UTF-8')) raise SyntaxError(msg + '\n\n' + line + '\n' + ' ' * indent + '^') return tokenize_rt.tokens_to_src(tokens), length
def _fix_percent_format(contents_text): try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text visitor = FindPercentFormats() visitor.visit(ast_obj) if not visitor.found: return contents_text tokens = src_to_tokens(contents_text) for i, token in reversed_enumerate(tokens): node = visitor.found.get(token.offset) if node is None: continue # no .format() equivalent for bytestrings in py3 # note that this code is only necessary when running in python2 if _is_bytestring(tokens[i].src): # pragma: no cover (py2-only) continue if isinstance(node.right, ast.Tuple): _fix_percent_format_tuple(tokens, i, node) elif isinstance(node.right, ast.Dict): _fix_percent_format_dict(tokens, i, node) return tokens_to_src(tokens)
def _fix_plugins(contents_text: str, settings: Settings) -> str: try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text callbacks = visit(FUNCS, ast_obj, settings) if not callbacks: return contents_text try: tokens = src_to_tokens(contents_text) except tokenize.TokenError: # pragma: no cover (bpo-2180) return contents_text _fixup_dedent_tokens(tokens) for i, token in reversed_enumerate(tokens): if not token.src: continue # though this is a defaultdict, by using `.get()` this function's # self time is almost 50% faster for callback in callbacks.get(token.offset, ()): callback(i, tokens) return tokens_to_src(tokens)
def _fix_src(contents_text: str, min_version: Tuple[int, ...]) -> str: try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text callbacks = visit(FUNCS, ast_obj, min_version) tokens = src_to_tokens(contents_text) for i, token in _changing_list(tokens): # DEDENT is a zero length token if not token.src: continue # though this is a defaultdict, by using `.get()` this function's # self time is almost 50% faster for callback in callbacks.get(token.offset, ()): callback(i, tokens) if token.src in START_BRACES: fix_brace( tokens, find_simple(i, tokens), add_comma=False, remove_comma=False, ) return tokens_to_src(tokens)
def remove_trailing_semicolon(src: str) -> Tuple[str, bool]: """Remove trailing semicolon from Jupyter notebook cell. For example, fig, ax = plt.subplots() ax.plot(x_data, y_data); # plot data would become fig, ax = plt.subplots() ax.plot(x_data, y_data) # plot data Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses ``tokenize_rt`` so that round-tripping works fine. """ from tokenize_rt import ( src_to_tokens, tokens_to_src, reversed_enumerate, ) tokens = src_to_tokens(src) trailing_semicolon = False for idx, token in reversed_enumerate(tokens): if token.name in TOKENS_TO_IGNORE: continue if token.name == "OP" and token.src == ";": del tokens[idx] trailing_semicolon = True break if not trailing_semicolon: return src, False return tokens_to_src(tokens), True
def _fix_tokens(contents_text: str, min_version: Version) -> str: remove_u = (min_version >= (3, ) or _imports_future(contents_text, 'unicode_literals')) try: tokens = src_to_tokens(contents_text) except tokenize.TokenError: return contents_text for i, token in reversed_enumerate(tokens): if token.name == 'NUMBER': tokens[i] = token._replace(src=_fix_long(_fix_octal(token.src))) elif token.name == 'STRING': tokens[i] = _fix_ur_literals(tokens[i]) if remove_u: tokens[i] = _remove_u_prefix(tokens[i]) tokens[i] = _fix_escape_sequences(tokens[i]) elif token.src == '(': _fix_extraneous_parens(tokens, i) elif token.src == 'format' and i > 0 and tokens[i - 1].src == '.': _fix_format_literal(tokens, i - 2) elif token.src == 'encode' and i > 0 and tokens[i - 1].src == '.': _fix_encode_to_binary(tokens, i) elif (min_version >= (3, ) and token.utf8_byte_offset == 0 and token.line < 3 and token.name == 'COMMENT' and tokenize.cookie_re.match(token.src)): del tokens[i] assert tokens[i].name == 'NL', tokens[i].name del tokens[i] elif token.src == 'from' and token.utf8_byte_offset == 0: _fix_import_removals(tokens, i, min_version) return tokens_to_src(tokens).lstrip()
def _fix_py2_compatible(contents_text): try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text visitor = Py2CompatibleVisitor() visitor.visit(ast_obj) if not any(( visitor.dicts, visitor.sets, visitor.set_empty_literals, visitor.is_literal, )): return contents_text tokens = src_to_tokens(contents_text) for i, token in reversed_enumerate(tokens): if token.offset in visitor.dicts: _process_dict_comp(tokens, i, visitor.dicts[token.offset]) elif token.offset in visitor.set_empty_literals: _process_set_empty_literal(tokens, i) elif token.offset in visitor.sets: _process_set_literal(tokens, i, visitor.sets[token.offset]) elif token.offset in visitor.is_literal: _process_is_literal(tokens, i, visitor.is_literal[token.offset]) return tokens_to_src(tokens)
def _fix_percent_format_dict( i: int, tokens: List[Token], *, node_right: ast.Dict, ) -> None: # TODO: handle \N escape sequences if r'\N' in tokens[i].src: return seen_keys: Set[str] = set() keys = {} for k in node_right.keys: # not a string key if not isinstance(k, ast.Str): return # duplicate key elif k.s in seen_keys: return # not an identifier elif not k.s.isidentifier(): return # a keyword elif k.s in KEYWORDS: return seen_keys.add(k.s) keys[ast_to_offset(k)] = k # TODO: this is overly timid brace = i + 4 if tokens_to_src(tokens[i + 1:brace + 1]) != ' % {': return fmt_victims = victims(tokens, brace, node_right, gen=False) brace_end = fmt_victims.ends[-1] key_indices = [] for j, token in enumerate(tokens[brace:brace_end], brace): key = keys.pop(token.offset, None) if key is None: continue # we found the key, but the string didn't match (implicit join?) elif ast.literal_eval(token.src) != key.s: return # the map uses some strange syntax that's not `'key': value` elif tokens[j + 1].src != ':' or tokens[j + 2].src != ' ': return else: key_indices.append((j, key.s)) assert not keys, keys tokens[brace_end] = tokens[brace_end]._replace(src=')') for key_index, s in reversed(key_indices): tokens[key_index:key_index + 3] = [Token('CODE', f'{s}=')] newsrc = _percent_to_format(tokens[i].src) tokens[i] = tokens[i]._replace(src=newsrc) tokens[i + 1:brace + 1] = [Token('CODE', '.format'), Token('OP', '(')]
def _fix_calls(contents_text: str) -> str: try: ast_obj = ast_parse(contents_text) except SyntaxError: return contents_text visitor = Visitor() visitor.visit(ast_obj) if not visitor.calls: return contents_text try: tokens = src_to_tokens(contents_text) except tokenize.TokenError: # pragma: no cover (bpo-2180) return contents_text for i, token in reversed_enumerate(tokens): if token.offset in visitor.calls: visitor.calls.discard(token.offset) # search forward for the opening brace while tokens[i].src != '(': i += 1 call_start = i i += 1 brace_depth = 1 start = -1 end = -1 while brace_depth: if tokens[i].src in {'(', '{', '['}: if brace_depth == 1: start = i brace_depth += 1 elif tokens[i].src in {')', '}', ']'}: brace_depth -= 1 if brace_depth == 1: end = i i += 1 assert start != -1 assert end != -1 call_end = i - 1 # dedent everything inside the brackets for i in range(call_start, call_end): if (tokens[i - 1].name == 'NL' and tokens[i].name == UNIMPORTANT_WS): tokens[i] = tokens[i]._replace(src=tokens[i].src[4:]) del tokens[end + 1:call_end] del tokens[call_start + 1:start] return tokens_to_src(tokens)
def fix_file(filename): with open(filename, 'rb') as f: contents_bytes = f.read() try: contents_text = contents_bytes.decode('UTF-8') except UnicodeDecodeError: print('{} is non-utf8 (not supported)'.format(filename)) return 1 tokens = tokenize_rt.src_to_tokens(contents_text) tokens_no_comments = _remove_comments(tokens) src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments) with tempfile.NamedTemporaryFile( dir=os.path.dirname(filename), prefix=os.path.basename(filename), suffix='.py', ) as tmpfile: tmpfile.write(src_no_comments.encode('UTF-8')) tmpfile.flush() flake8_results = _run_flake8(tmpfile.name) for i, token in reversed(tuple(enumerate(tokens))): if token.name != 'COMMENT': continue if NOQA_RE.search(token.src): _rewrite_noqa_comment(tokens, i, flake8_results) elif NOQA_FILE_RE.match(token.src) and not flake8_results: if i == 0 or tokens[i - 1].name == 'NEWLINE': del tokens[i: i + 2] else: _remove_comment(tokens, i) newsrc = tokenize_rt.tokens_to_src(tokens) if newsrc != contents_text: print('Rewriting {}'.format(filename)) with open(filename, 'wb') as f: f.write(newsrc.encode('UTF-8')) return 1 else: return 0
def decode(b, errors="strict"): non_coding_tokens = frozenset( ("COMMENT", tokenize_rt.ESCAPED_NL, "NL", tokenize_rt.UNIMPORTANT_WS)) u, length = utf_8.decode(b, errors) tokens = tokenize_rt.src_to_tokens(u) to_replace = [] started = -1 end = -1 for i in range(0, 1 + len(tokens)): if i < len(tokens): token = tokens[i] else: token = None if token: if fstr(token): if started < 0: started = i continue end = i if started >= 0: if peek_is_fstr(tokens, i + 1): continue if peek_is_str(tokens, i + 1): continue if token is None: pass elif token.name in non_coding_tokens or peek_is_str(tokens, i): #multiline f-string+str continue to_replace.append((started, end)) started = -1 for start, end in reversed(to_replace): if end - start > 1: #move ending line away from format of multiline fstrings if tokens[end - 1].name in non_coding_tokens: end -= 1 try: tokens[start:end] = _make_fstring(tokens[start:end]) except TokenSyntaxError as e: msg = str(e.e) line = u.splitlines()[e.token.line - 1] bts = line.encode("UTF-8")[:e.token.utf8_byte_offset] indent = len(bts.decode("UTF-8")) raise SyntaxError(msg + "\n\n" + line + "\n" + " " * indent + "^") return tokenize_rt.tokens_to_src(tokens), length
def decode(b, errors='strict'): u, length = utf_8.decode(b, errors) tokens = tokenize_rt.src_to_tokens(u) new_tokens = [] for token in tokens: if token.name == 'NUMBER': new_tokens.extend( tokenize_rt.src_to_tokens("blurse({})".format(token.src))) else: new_tokens.append(token) return tokenize_rt.tokens_to_src(new_tokens), length
def _fix_octal_literals(contents_text): def _fix_octal(s): if not s.startswith('0') or not s.isdigit() or s == len(s) * '0': return s else: # pragma: no cover (py2 only) return '0o' + s[1:] tokens = src_to_tokens(contents_text) for i, token in enumerate(tokens): if token.name == 'NUMBER': tokens[i] = token._replace(src=_fix_octal(token.src)) return tokens_to_src(tokens)
def _fix_unicode_literals(contents_text, py3_plus): if not py3_plus and not _imports_unicode_literals(contents_text): return contents_text tokens = src_to_tokens(contents_text) for i, token in enumerate(tokens): if token.name != 'STRING': continue match = STRING_PREFIXES_RE.match(token.src) prefix = match.group(1) rest = match.group(2) new_prefix = prefix.replace('u', '').replace('U', '') tokens[i] = Token('STRING', new_prefix + rest) return tokens_to_src(tokens)
def _fix_open_mode(i: int, tokens: List[Token]) -> None: j = find_open_paren(tokens, i) func_args, end = parse_call_args(tokens, j) mode = tokens_to_src(tokens[slice(*func_args[1])]) mode_stripped = mode.strip().strip('"\'') if mode_stripped in U_MODE_REMOVE: del tokens[func_args[0][1]:func_args[1][1]] elif mode_stripped in U_MODE_REPLACE_R: new_mode = mode.replace('U', 'r') tokens[slice(*func_args[1])] = [Token('SRC', new_mode)] elif mode_stripped in U_MODE_REMOVE_U: new_mode = mode.replace('U', '') tokens[slice(*func_args[1])] = [Token('SRC', new_mode)] else: raise AssertionError(f'unreachable: {mode!r}')
def decode(b: bytes, errors: str = 'strict') -> Tuple[str, int]: u, length = utf_8.decode(b, errors) # replace encoding cookie so there isn't a recursion problem lines = u.splitlines(True) for idx in (0, 1): if idx >= len(lines): break lines[idx] = tokenize.cookie_re.sub(_new_coding_cookie, lines[idx]) u = ''.join(lines) visitor = Visitor() visitor.visit(_ast_parse(u)) tokens = tokenize_rt.src_to_tokens(u) for i, token in tokenize_rt.reversed_enumerate(tokens): if token.offset in visitor.offsets: # look forward for a `:`, `,`, `=`, ')' depth = 0 j = i + 1 while depth or tokens[j].src not in {':', ',', '=', ')', '\n'}: if tokens[j].src in {'(', '{', '['}: depth += 1 elif tokens[j].src in {')', '}', ']'}: depth -= 1 j += 1 j -= 1 # look backward to delete whitespace / comments / etc. while tokens[j].name in tokenize_rt.NON_CODING_TOKENS: j -= 1 quoted = repr(tokenize_rt.tokens_to_src(tokens[i:j + 1])) tokens[i:j + 1] = [tokenize_rt.Token('STRING', quoted)] return tokenize_rt.tokens_to_src(tokens), length
def _fix_percent_format_tuple(tokens, start, node): # TODO: this is overly timid paren = start + 4 if tokens_to_src(tokens[start + 1:paren + 1]) != ' % (': return victims = _victims(tokens, paren, node.right, gen=False) victims.ends.pop() for index in reversed(victims.starts + victims.ends): _remove_brace(tokens, index) newsrc = _percent_to_format(tokens[start].src) tokens[start] = tokens[start]._replace(src=newsrc) tokens[start + 1:paren] = [Token('Format', '.format'), Token('OP', '(')]