def test_bug_926075(self): try: unicode except NameError: return # no problem if we have no unicode self.assertTrue(re.compile('bug_926075') is not re.compile(eval("u'bug_926075'")))
def test_re_match(self): self.assertEqual(re.match('a', 'a').groups(), ()) self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) pat = re.compile('((a)|(b))(c)?') self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) # A single group m = re.match('(a)', 'a') self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(0), 'a') self.assertEqual(m.group(1), 'a') self.assertEqual(m.group(1, 1), ('a', 'a')) pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', None)) self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
def test_empty_array(self): # SF buf 1647541 import array for typecode in 'cbBuhHiIlLfd': a = array.array(typecode) self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ())
def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#') pattern = re.compile('$', re.MULTILINE) self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#')
def test_regex_compile(count): re._cache = EmptyCache() regexes = capture_regexes() times = [] for _ in xrange(count): t0 = time.time() for regex, flags in regexes: re.compile(regex, flags) t1 = time.time() times.append(t1 - t0) return times
def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns pattern = re.compile('.') self.assertRaises(ValueError, re.match, pattern, 'A', re.I) self.assertRaises(ValueError, re.search, pattern, 'A', re.I) self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) self.assertRaises(ValueError, re.compile, pattern, re.I)
def test_bug_931848(self): try: unicode except NameError: pass pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') self.assertEqual(re.compile(pattern).split("a.b.c"), ['a','b','c'])
def test_bug_581080(self): iter = re.finditer(r"\s", "a b") self.assertEqual(iter.next().span(), (1,2)) self.assertRaises(StopIteration, iter.next) scanner = re.compile(r"\s").scanner("a b") self.assertEqual(scanner.search().span(), (1, 2)) self.assertEqual(scanner.search(), None)
def test_bug_764548(self): # bug 764548, re.compile() barfs on str/unicode subclasses try: unicode except NameError: return # no problem if we have no unicode class my_unicode(unicode): pass pat = re.compile(my_unicode("abc")) self.assertEqual(pat.match("xyz"), None)
def test_inline_flags(self): # Bug #1700 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow p = re.compile(upper_char, re.I | re.U) q = p.match(lower_char) self.assertNotEqual(q, None) p = re.compile(lower_char, re.I | re.U) q = p.match(upper_char) self.assertNotEqual(q, None) p = re.compile('(?i)' + upper_char, re.U) q = p.match(lower_char) self.assertNotEqual(q, None) p = re.compile('(?i)' + lower_char, re.U) q = p.match(upper_char) self.assertNotEqual(q, None) p = re.compile('(?iu)' + upper_char) q = p.match(lower_char) self.assertNotEqual(q, None) p = re.compile('(?iu)' + lower_char) q = p.match(upper_char) self.assertNotEqual(q, None)
def test_re_groupref_exists(self): self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), ('a', 'b')) self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), (None, 'd')) self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), (None, 'd')) self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), ('a', '')) # Tests for bug #1177831: exercise groups other than the first group p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') self.assertEqual(p.match('abc').groups(), ('a', 'b', 'c')) self.assertEqual(p.match('ad').groups(), ('a', None, 'd')) self.assertEqual(p.match('abd'), None) self.assertEqual(p.match('ac'), None)
# Python imports import optparse import sys, os sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) import pcre_re as re import time # Local imports import util # These are the regular expressions to be tested. These sync up, # index-for-index with the list of strings generated by gen_string_table() # below. regexs = [ re.compile('Python|Perl'), re.compile('Python|Perl'), re.compile('(Python|Perl)'), re.compile('(?:Python|Perl)'), re.compile('Python'), re.compile('Python'), re.compile('.*Python'), re.compile('.*Python.*'), re.compile('.*(Python)'), re.compile('.*(?:Python)'), re.compile('Python|Perl|Tcl'), re.compile('Python|Perl|Tcl'), re.compile('(Python|Perl|Tcl)'), re.compile('(?:Python|Perl|Tcl)'), re.compile('(Python)\\1'), re.compile('(Python)\\1'),
def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: print 'Running re_tests test suite' else: # To save time, only run the first and last 10 tests #tests = tests[:10] + tests[-10:] pass for t in tests: sys.stdout.flush() pattern = s = outcome = repl = expected = None if len(t) == 5: pattern, s, outcome, repl, expected = t elif len(t) == 3: pattern, s, outcome = t else: raise ValueError, ('Test tuples should have 3 or 5 fields', t) try: obj = re.compile(pattern) except re.error: if outcome == SYNTAX_ERROR: pass # Expected a syntax error else: print '=== Syntax error:', t except KeyboardInterrupt: raise KeyboardInterrupt except: print '*** Unexpected error ***', t if verbose: traceback.print_exc(file=sys.stdout) else: try: result = obj.search(s) except re.error, msg: print '=== Unexpected exception', t, repr(msg) if outcome == SYNTAX_ERROR: # This should have been a syntax error; forget it. pass elif outcome == FAIL: if result is None: pass # No match, as expected else: print '=== Succeeded incorrectly', t elif outcome == SUCCEED: if result is not None: # Matched, as expected, so now we compute the # result string and compare it to our expected result. start, end = result.span(0) vardict={'found': result.group(0), 'groups': result.group(), 'flags': result.re.flags} for i in range(1, 100): try: gi = result.group(i) # Special hack because else the string concat fails: if gi is None: gi = "None" except IndexError: gi = "Error" vardict['g%d' % i] = gi for i in result.re.groupindex.keys(): try: gi = result.group(i) if gi is None: gi = "None" except IndexError: gi = "Error" vardict[i] = gi repl = eval(repl, vardict) if repl != expected: print '=== grouping error', t, print repr(repl) + ' should be ' + repr(expected) else: print '=== Failed incorrectly', t # Try the match on a unicode string, and check that it # still succeeds. try: result = obj.search(unicode(s, "latin-1")) if result is None: print '=== Fails on unicode match', t except NameError: continue # 1.5.2 except TypeError: continue # unicode test case # Try the match on a unicode pattern, and check that it # still succeeds. obj=re.compile(unicode(pattern, "latin-1")) result = obj.search(s) if result is None: print '=== Fails on unicode pattern match', t # Try the match with the search area limited to the extent # of the match and see if it still succeeds. \B will # break (because it won't match at the end or start of a # string), so we'll ignore patterns that feature it. if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \ and result is not None: obj = re.compile(pattern) result = obj.search(s, result.start(0), result.end(0) + 1) if result is None: print '=== Failed on range-limited match', t # Try the match with IGNORECASE enabled, and check that it # still succeeds. obj = re.compile(pattern, re.IGNORECASE) result = obj.search(s) if result is None: print '=== Fails on case-insensitive match', t # Try the match with LOCALE enabled, and check that it # still succeeds. obj = re.compile(pattern, re.LOCALE) result = obj.search(s) if result is None: print '=== Fails on locale-sensitive match', t # Try the match with UNICODE locale enabled, and check # that it still succeeds. obj = re.compile(pattern, re.UNICODE) result = obj.search(s) if result is None: print '=== Fails on unicode-sensitive match', t
def test_bug_3629(self): # A regex that triggered a bug in the sre-code validator re.compile("(?P<quote>)(?(quote))")
def test_weakref(self): s = 'QabbbcR' x = re.compile('ab+c') y = proxy(x) self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
def pickle_test(self, pickle): oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') s = pickle.dumps(oldpat) newpat = pickle.loads(s) self.assertEqual(oldpat, newpat)
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertNotEqual(re.compile('^pattern$', flag), None)
def test_bug_612074(self): pat=u"["+re.escape(u"\u2039")+u"]" self.assertEqual(re.compile(pat) and 1, 1)