def test_re_escape_non_ascii_bytes(self): b = u'y\u2620y\u2620y'.encode('utf-8') b_escaped = re.escape(b) self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') self.assertMatch(b_escaped, b) res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) self.assertEqual(len(res), 2)
def test_re_escape_non_ascii(self): s = u'xxx\u2620\u2620\u2620xxx' s_escaped = re.escape(s) self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') self.assertMatch(s_escaped, s) self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, u'x\u2620\u2620\u2620x', (2, 7), re.search)
def test_re_escape_byte(self): alnum_chars = (string.ascii_letters + string.digits).encode('ascii') p = ''.join(chr(i) for i in range(256)) for b in p: if b in alnum_chars: self.assertEqual(re.escape(b), b) elif b == b'\x00': self.assertEqual(re.escape(b), b'\\000') else: self.assertEqual(re.escape(b), b'\\' + b) self.assertMatch(re.escape(b), b) self.assertMatch(re.escape(p), p)
def test_re_escape(self): alnum_chars = string.ascii_letters + string.digits p = u''.join(unichr(i) for i in range(256)) for c in p: if c in alnum_chars: self.assertEqual(re.escape(c), c) elif c == u'\x00': self.assertEqual(re.escape(c), u'\\000') else: self.assertEqual(re.escape(c), u'\\' + c) self.assertMatch(re.escape(c), c) self.assertMatch(re.escape(p), p)
def test_basic_re_sub(self): self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), '9.3 -3 24x100y') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), '9.3 -3 23x99y') self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') self.assertEqual(re.sub('.', r"\n", 'x'), '\n') s = r"\1\1" self.assertEqual(re.sub('(.)', s, 'x'), 'xx') self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
def test_bug_612074(self): pat=u"["+re.escape(u"\u2039")+u"]" self.assertEqual(re.compile(pat) and 1, 1)
def execute(mode, code, input_str): result = "" if mode == "l": rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)] table = {} for row in rows: table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1)))) if input_str in table: result = table[input_str] else: result = table["?"] elif mode == "f": result = code % ast.literal_eval(input_str) elif mode == "F": literal = ast.literal_eval(input_str) if isinstance(literal, tuple): result = code % literal input_str = str(sum([len(str(x)) for x in literal])) else: result = code % literal input_str = str(len(str(literal))) elif mode == "g": for string in exrex.generate(code): print(string.encode("utf-8").decode("unicode-escape")) return # Generate is always terminal elif mode == "h": if type(input_str) is str: input_str = pcre.escape(input_str) for string in exrex.generate(code % input_str): print(string.encode("utf-8").decode("unicode-escape")) return elif mode == "p": literal = ast.literal_eval(input_str) if isinstance(literal, int): result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL) else: result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL) elif mode == "P": result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL) elif mode == "e": rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)] table = {} for row in rows: table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1)))) for char in i: result += table[i] elif mode == "o": pieces = pcre.split(r"(?<![^\\]\\)`", code) print(pieces[0].encode("utf-8").decode("unicode-escape")) result = "`" + "`".join(pieces[1:]) elif mode == "s": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) for i in range(0, len(subs), 2): input_str = pcre.sub(subs[i], subs[i + 1], input_str) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = input_str elif mode == "d": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) for sub in subs: input_str = pcre.sub(sub, "", input_str) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = input_str elif mode == "S": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) output = input_str for i in range(0, len(subs), 2): output = pcre.sub(subs[i], subs[i + 1], output) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = "" print(output.encode("utf-8").decode("unicode-escape")) elif mode == "i": result = code + input_str elif mode == "I": result = code + "\n" + input_str else: result = code if len(result) > 0 and result[0] == "`": input_pieces = pcre.split(r"(?<![^\\]\\)!", result) if len(input_pieces) >= 2: execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:])) else: execute(result[1], result[2:], get_input(input_str)) else: print(result.encode("utf-8").decode("unicode-escape"))
def execute(mode, code, input_str): result = "" if mode == "l": rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)) table = handle_table(rows) if input_str in table: result = table[input_str] else: result = table["?"] elif mode == "f": result = code % ast.literal_eval(input_str) elif mode == "F": literal = ast.literal_eval(input_str) if isinstance(literal, tuple): result = code % literal input_str = str(sum((len(str(x)) for x in literal))) else: result = code % literal input_str = str(len(str(literal))) elif mode == "g": for string in exrex.generate(code): print(unescape(string)) return # Generate is always terminal elif mode == "h": if type(input_str) is str: input_str = pcre.escape(input_str) for string in exrex.generate(code % input_str): print(unescape(string)) return elif mode == "p": literal = ast.literal_eval(input_str) if isinstance(literal, int): result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL) else: result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL) elif mode == "P": result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL) elif mode == "e": rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)) table = handle_table(rows) for char in i: result += table[i] elif mode == "o": pieces = pcre.split(r"(?<![^\\]\\)`", code) print(unescape(pieces[0])) result = handle_pieces(pieces[1:], "") elif mode == "s": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) input_str = handle_subs(input_str, subs) result = handle_pieces(pieces[1:], input_str) elif mode == "d": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) for sub in subs: input_str = pcre.sub(sub, "", input_str) result = handle_pieces(pieces[1:], input_str) elif mode == "S": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) output = unescape(handle_subs(input_str, subs)) result = handle_pieces(pieces[1:], "") print(output) elif mode == "i": result = code + input_str elif mode == "I": result = code + "\n" + input_str else: result = code if len(result) > 0 and result[0] == "`": input_pieces = pcre.split(r"(?<![^\\]\\)!", result) if len(input_pieces) >= 2: execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:])) else: execute(result[1], result[2:], get_input(input_str)) else: print(unescape(result))