def _remove_u_prefix(token: Token) -> Token: prefix, rest = parse_string_literal(token.src) if 'u' not in prefix.lower(): return token else: new_prefix = prefix.replace('u', '').replace('U', '') return token._replace(src=new_prefix + rest)
def _fix_escape_sequences(token: Token) -> Token: prefix, rest = parse_string_literal(token.src) actual_prefix = prefix.lower() if 'r' in actual_prefix or '\\' not in rest: return token is_bytestring = 'b' in actual_prefix def _is_valid_escape(match: Match[str]) -> bool: c = match.group()[1] return ( c in ESCAPE_STARTS or (not is_bytestring and c in 'uU') or ( not is_bytestring and c == 'N' and bool(NAMED_ESCAPE_NAME.match(rest, match.end())) ) ) has_valid_escapes = False has_invalid_escapes = False for match in ESCAPE_RE.finditer(rest): if _is_valid_escape(match): has_valid_escapes = True else: has_invalid_escapes = True def cb(match: Match[str]) -> str: matched = match.group() if _is_valid_escape(match): return matched else: return fr'\{matched}' if has_invalid_escapes and (has_valid_escapes or 'u' in actual_prefix): return token._replace(src=prefix + ESCAPE_RE.sub(cb, rest)) elif has_invalid_escapes and not has_valid_escapes: return token._replace(src=prefix + 'r' + rest) else: return token
def _fix_ur_literals(token: Token) -> Token: prefix, rest = parse_string_literal(token.src) if prefix.lower() != 'ur': return token else: def cb(match: Match[str]) -> str: escape = match.group() if escape[1].lower() == 'u': return escape else: return '\\' + match.group() rest = ESCAPE_RE.sub(cb, rest) prefix = prefix.replace('r', '').replace('R', '') return token._replace(src=prefix + rest)