Esempio n. 1
0
 def test_re_escape_non_ascii_bytes(self):
     b = u'y\u2620y\u2620y'.encode('utf-8')
     b_escaped = re.escape(b)
     self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
     self.assertMatch(b_escaped, b)
     res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
     self.assertEqual(len(res), 2)
Esempio n. 2
0
 def test_re_escape_non_ascii(self):
     s = u'xxx\u2620\u2620\u2620xxx'
     s_escaped = re.escape(s)
     self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
     self.assertMatch(s_escaped, s)
     self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
                      u'x\u2620\u2620\u2620x', (2, 7), re.search)
Esempio n. 3
0
 def test_re_escape_byte(self):
     alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
     p = ''.join(chr(i) for i in range(256))
     for b in p:
         if b in alnum_chars:
             self.assertEqual(re.escape(b), b)
         elif b == b'\x00':
             self.assertEqual(re.escape(b), b'\\000')
         else:
             self.assertEqual(re.escape(b), b'\\' + b)
         self.assertMatch(re.escape(b), b)
     self.assertMatch(re.escape(p), p)
Esempio n. 4
0
 def test_re_escape(self):
     alnum_chars = string.ascii_letters + string.digits
     p = u''.join(unichr(i) for i in range(256))
     for c in p:
         if c in alnum_chars:
             self.assertEqual(re.escape(c), c)
         elif c == u'\x00':
             self.assertEqual(re.escape(c), u'\\000')
         else:
             self.assertEqual(re.escape(c), u'\\' + c)
         self.assertMatch(re.escape(c), c)
     self.assertMatch(re.escape(p), p)
Esempio n. 5
0
    def test_basic_re_sub(self):
        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
                         '9.3 -3 24x100y')
        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
                         '9.3 -3 23x99y')

        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')

        s = r"\1\1"
        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)

        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))

        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
Esempio n. 6
0
 def test_bug_612074(self):
     pat=u"["+re.escape(u"\u2039")+u"]"
     self.assertEqual(re.compile(pat) and 1, 1)
Esempio n. 7
0
def execute(mode, code, input_str):
  result = ""

  if mode == "l":
     rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)]
     table = {}
     for row in rows:
       table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1))))
     if input_str in table:
       result = table[input_str]
     else:
       result = table["?"]
  elif mode == "f":
     result = code % ast.literal_eval(input_str)
  elif mode == "F":
     literal = ast.literal_eval(input_str)
     if isinstance(literal, tuple):
       result = code % literal
       input_str = str(sum([len(str(x)) for x in literal]))
     else:
       result = code % literal
       input_str = str(len(str(literal)))
  elif mode == "g":
    for string in exrex.generate(code):
      print(string.encode("utf-8").decode("unicode-escape"))
    return # Generate is always terminal
  elif mode == "h":
    if type(input_str) is str:
      input_str = pcre.escape(input_str)
    for string in exrex.generate(code % input_str):
      print(string.encode("utf-8").decode("unicode-escape"))
    return
  elif mode == "p":
    literal = ast.literal_eval(input_str)
    if isinstance(literal, int):
      result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL)
    else:
      result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL)
  elif mode == "P":
    result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL)
  elif mode == "e":
    rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)]
    table = {}
    for row in rows:
      table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1))))
    for char in i:
      result += table[i]
  elif mode == "o":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    print(pieces[0].encode("utf-8").decode("unicode-escape"))
    result = "`" + "`".join(pieces[1:])
  elif mode == "s":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])
    sub_length = len(subs)

    for i in range(0, len(subs), 2):
      input_str = pcre.sub(subs[i], subs[i + 1], input_str)

    if len(pieces) > 1:
      result = "`" + "`".join(pieces[1:])
    else:
      result = input_str
  elif mode == "d":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])

    for sub in subs:
      input_str = pcre.sub(sub, "", input_str)

    if len(pieces) > 1:
      result = "`" + "`".join(pieces[1:])
    else:
      result = input_str
  elif mode == "S":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])
    sub_length = len(subs)
    output = input_str
    for i in range(0, len(subs), 2):
      output = pcre.sub(subs[i], subs[i + 1], output)
    if len(pieces) > 1:
      result = "`" + "`".join(pieces[1:])
    else:
      result = ""
    print(output.encode("utf-8").decode("unicode-escape"))
  elif mode == "i":
    result = code + input_str
  elif mode == "I":
    result = code + "\n" + input_str
  else:
    result = code

  if len(result) > 0 and result[0] == "`":
    input_pieces = pcre.split(r"(?<![^\\]\\)!", result)
    if len(input_pieces) >= 2:
      execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:]))
    else:
      execute(result[1], result[2:], get_input(input_str))
  else:
    print(result.encode("utf-8").decode("unicode-escape"))
Esempio n. 8
0
def execute(mode, code, input_str):
  result = ""

  if mode == "l":
     rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code))
     table = handle_table(rows)
    
     if input_str in table:
       result = table[input_str]
     else:
       result = table["?"]
  elif mode == "f":
     result = code % ast.literal_eval(input_str)
  elif mode == "F":
     literal = ast.literal_eval(input_str)
     if isinstance(literal, tuple):
       result = code % literal
       input_str = str(sum((len(str(x)) for x in literal)))
     else:
       result = code % literal
       input_str = str(len(str(literal)))
  elif mode == "g":
    for string in exrex.generate(code):
      print(unescape(string))     
    return # Generate is always terminal
  elif mode == "h":
    if type(input_str) is str:
      input_str = pcre.escape(input_str)
    for string in exrex.generate(code % input_str):
      print(unescape(string)) 
    return
  elif mode == "p":
    literal = ast.literal_eval(input_str)
    if isinstance(literal, int):
      result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL) 
    else:
      result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL)
  elif mode == "P":
    result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL)
  elif mode == "e":
    rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code))
    table = handle_table(rows)
    
    for char in i:
      result += table[i]
  elif mode == "o":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    print(unescape(pieces[0]))
    result = handle_pieces(pieces[1:], "")
  elif mode == "s":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])

    input_str = handle_subs(input_str, subs)
    result = handle_pieces(pieces[1:], input_str)
  elif mode == "d":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])
    
    for sub in subs:
      input_str = pcre.sub(sub, "", input_str)

    result = handle_pieces(pieces[1:], input_str)
  elif mode == "S":
    pieces = pcre.split(r"(?<![^\\]\\)`", code)
    subs = pcre.split(r"(?<![^\\]\\)&", pieces[0])
    sub_length = len(subs)

    output = unescape(handle_subs(input_str, subs))
    
    result = handle_pieces(pieces[1:], "")
    print(output)
  elif mode == "i":
    result = code + input_str
  elif mode == "I":
    result = code + "\n" + input_str
  else:
    result = code

  if len(result) > 0 and result[0] == "`":
    input_pieces = pcre.split(r"(?<![^\\]\\)!", result)
    if len(input_pieces) >= 2:
      execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:]))
    else:
      execute(result[1], result[2:], get_input(input_str))
  else:
    print(unescape(result))