def test_not_literal(self): r = get_code(r"[^a]") assert rsre_core.match(r, "A") assert not rsre_core.match(r, "a") r = get_code(r"[^a]+$") assert rsre_core.match(r, "Bx123") assert not rsre_core.match(r, "--a--")
def test_not_literal_ignore(self): r = get_code(r"(?i)[^a]") assert rsre_core.match(r, "G") assert not rsre_core.match(r, "a") assert not rsre_core.match(r, "A") r = get_code(r"(?i)[^a]+$") assert rsre_core.match(r, "Gx123") assert not rsre_core.match(r, "--A--")
def test_in_ignore(self): r = get_code(r"(?i)[a-f]") assert rsre_core.match(r, "b") assert rsre_core.match(r, "C") assert not rsre_core.match(r, "g") r = get_code(r"(?i)[a-f]+$") assert rsre_core.match(r, "bCdEf") assert not rsre_core.match(r, "g") assert not rsre_core.match(r, "aaagaaa")
def test_match_start(self): r = get_code(r"^ab") assert rsre_core.match(r, "abc") assert not rsre_core.match(r, "xxxabc", start=3) assert not rsre_core.match(r, "xx\nabc", start=3) # r = get_code(r"(?m)^ab") assert rsre_core.match(r, "abc") assert not rsre_core.match(r, "xxxabc", start=3) assert rsre_core.match(r, "xx\nabc", start=3)
def test_groupref_exists(self): r = get_code(r"((a)|(b))c(?(2)d)$") assert not rsre_core.match(r, "ac") assert rsre_core.match(r, "acd") assert rsre_core.match(r, "bc") assert not rsre_core.match(r, "bcd") # r = get_code(r"((a)|(b))c(?(2)d|e)$") assert not rsre_core.match(r, "ac") assert rsre_core.match(r, "acd") assert not rsre_core.match(r, "ace") assert not rsre_core.match(r, "bc") assert not rsre_core.match(r, "bcd") assert rsre_core.match(r, "bce")
def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb rsre_char.set_unicode_db(unicodedb) # r = get_code(u"[\U00010428-\U0001044f]", re.I) assert r.pattern.count(27) == 1 # OPCODE_RANGE r.pattern[r.pattern.index(27)] = 32 # => OPCODE_RANGE_IGNORE assert rsre_core.match(r, u"\U00010428")
def test_lookbehind(self): r = get_code(r"([a-z]*)(?<=de)") assert rsre_core.match(r, "ade") res = rsre_core.match(r, "adefg") assert res is not None and res.get_mark(1) == 3 assert not rsre_core.match(r, "abc") assert not rsre_core.match(r, "X") assert not rsre_core.match(r, "eX")
def test_assert_not_group(self): r = get_code(r"abc(?!(de)f)(.)") res = rsre_core.match(r, "abcdeFghi") assert res is not None assert res.span(2) == (3, 4) # this I definitely classify as Horrendously Implementation Dependent. # CPython answers (3, 5). assert res.span(1) == (-1, -1)
def __init__(self, name, pattern, flags=0, transition=None, target=None): self.name = name self.re = re.compile(pattern, flags=flags) self.transition = transition self.target = target if rpython: self.flags = flags self._pattern = get_code(pattern, flags)
def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb rsre_char.set_unicode_db(unicodedb) # r = get_code(u"[\U00010428-\U0001044f]", re.I) assert r.count(27) == 1 # OPCODE_RANGE r[r.index(27)] = 32 # => OPCODE_RANGE_IGNORE assert rsre_core.match(r, u"\U00010428")
def test_match_end(self): r = get_code("ab") assert rsre_core.match(r, "abc") assert rsre_core.match(r, "abc", end=333) assert rsre_core.match(r, "abc", end=3) assert rsre_core.match(r, "abc", end=2) assert not rsre_core.match(r, "abc", end=1) assert not rsre_core.match(r, "abc", end=0) assert not rsre_core.match(r, "abc", end=-1)
def convert_const(self, rule): if rule not in self.ll_rule_cache: ll_rule = lltype.malloc(self.lowleveltype.TO) ll_rule.name = llstr(rule.name) code = get_code(rule.re.pattern) ll_rule.code = lltype.malloc(self.lowleveltype.TO.code.TO, len(code)) for i, c in enumerate(code): ll_rule.code[i] = c self.ll_rule_cache[rule] = ll_rule return self.ll_rule_cache[rule]
def test_groupref(self): r = get_code(r"(xx+)\1+$") # match non-prime numbers of x assert not rsre_core.match(r, "xx") assert not rsre_core.match(r, "xxx") assert rsre_core.match(r, "xxxx") assert not rsre_core.match(r, "xxxxx") assert rsre_core.match(r, "xxxxxx") assert not rsre_core.match(r, "xxxxxxx") assert rsre_core.match(r, "xxxxxxxx") assert rsre_core.match(r, "xxxxxxxxx")
def test_groupref_ignore(self): r = get_code(r"(?i)(xx+)\1+$") # match non-prime numbers of x assert not rsre_core.match(r, "xX") assert not rsre_core.match(r, "xxX") assert rsre_core.match(r, "Xxxx") assert not rsre_core.match(r, "xxxXx") assert rsre_core.match(r, "xXxxxx") assert not rsre_core.match(r, "xxxXxxx") assert rsre_core.match(r, "xxxxxxXx") assert rsre_core.match(r, "xxxXxxxxx")
def test_negative_lookbehind(self): def found(s): res = rsre_core.match(r, s) assert res is not None return res.get_mark(1) r = get_code(r"([a-z]*)(?<!dd)") assert found("ade") == 3 assert found("adefg") == 5 assert found("abcdd") == 4 assert found("abddd") == 3 assert found("adddd") == 2 assert found("ddddd") == 1 assert found("abXde") == 2
def test_negative_lookbehind(self): def found(s): res = match(r, s) assert res is not None return res.get_mark(1) r = get_code(r"([a-z]*)(?<!dd)") assert found("ade") == P(3) assert found("adefg") == P(5) assert found("abcdd") == P(4) assert found("abddd") == P(3) assert found("adddd") == P(2) assert found("ddddd") == P(1) assert found("abXde") == P(2)
def test_bigcharset(self): for i in range(100): chars = [unichr(random.randrange(0x100, 0xD000)) for n in range(random.randrange(1, 25))] pattern = u'[%s]' % (u''.join(chars),) r = get_code(pattern) for c in chars: assert rsre_core.match(r, c) for i in range(200): c = unichr(random.randrange(0x0, 0xD000)) res = rsre_core.match(r, c) if c in chars: assert res is not None else: assert res is None
def test_bigcharset(self): for i in range(100): chars = [ unichr(random.randrange(0x100, 0xD000)) for n in range(random.randrange(1, 25)) ] pattern = u'[%s]' % (u''.join(chars), ) r = get_code(pattern) for c in chars: assert rsre_core.match(r, c) for i in range(200): c = unichr(random.randrange(0x0, 0xD000)) res = rsre_core.match(r, c) if c in chars: assert res is not None else: assert res is None
def test_bug1(self): # REPEAT_ONE inside REPEAT r = get_code(r"(?:.+)?B") assert rsre_core.match(r, "AB") is not None r = get_code(r"(?:AA+?)+B") assert rsre_core.match(r, "AAAB") is not None r = get_code(r"(?:AA+)+?B") assert rsre_core.match(r, "AAAB") is not None r = get_code(r"(?:AA+?)+?B") assert rsre_core.match(r, "AAAB") is not None # REPEAT inside REPEAT r = get_code(r"(?:(?:xy)+)?B") assert rsre_core.match(r, "xyB") is not None r = get_code(r"(?:xy(?:xy)+?)+B") assert rsre_core.match(r, "xyxyxyB") is not None r = get_code(r"(?:xy(?:xy)+)+?B") assert rsre_core.match(r, "xyxyxyB") is not None r = get_code(r"(?:xy(?:xy)+?)+?B") assert rsre_core.match(r, "xyxyxyB") is not None
def test_fullmatch_4(self): r = get_code(r"a((bp)*)c") match = rsre_core.fullmatch(r, "abpbpbpc") assert match.group(1) == "bpbpbp"
def test_fullmatch_1(self): r = get_code(r"ab*c") assert not rsre_core.fullmatch(r, "abbbcdef") assert rsre_core.fullmatch(r, "abbbc")
def test_fullmatch_2(self): r = get_code(r"a(b*?)") match = rsre_core.fullmatch(r, "abbb") assert match.group(1) == "bbb" assert not rsre_core.fullmatch(r, "abbbc")
def test_repeated_set(self): r = get_code(r"[a0x]+f") assert rsre_core.match(r, "a0af") assert not rsre_core.match(r, "a0yaf")
def test_empty_maxuntil(self): r = get_code("\\{\\{((?:.*?)+)\\}\\}") match = rsre_core.match(r, "{{a}}{{b}}") assert match.group(1) == "a"
def test_any_all(self): r = get_code(r"(?s)ab.cd") assert rsre_core.match(r, "abXcdef") assert rsre_core.match(r, "ab\ncdef") assert not rsre_core.match(r, "ab\ncDef")
def test_any_all_repetition(self): r = get_code(r"(?s)ab.*cd") assert rsre_core.match(r, "abXXXXcdef") assert rsre_core.match(r, "abcdef") assert rsre_core.match(r, "abX\nXcdef") assert not rsre_core.match(r, "abX\nXcDef")
def get_code_and_re(regexp): return get_code(regexp), re.compile(regexp)
def test_or(self): r = get_code(r"a|bc|def") assert rsre_core.match(r, "a") assert rsre_core.match(r, "bc") assert rsre_core.match(r, "def") assert not rsre_core.match(r, "ghij")
def test_match_bug3(self): if VERSION == "2.7.5": py.test.skip("pattern fails to compile with exactly 2.7.5 " "(works on 2.7.3 and on 2.7.trunk though)") r = get_code(r'([ax]*?x*)?$') assert rsre_core.match(r, "aaxaa")
def test_match_bug2(self): r = get_code(r'(x??)??$') assert rsre_core.match(r, "x")
def test_assert_not(self): r = get_code(r"abc(?!def)(.)") res = rsre_core.match(r, "abcdeFghi") assert res is not None and res.get_mark(1) == 4 assert not rsre_core.match(r, "abcdefghi")
def test_simple_match_1(self): r = get_code(r"ab*bbbbbbbc") print r match = rsre_core.match(r, "abbbbbbbbbcdef") assert match assert match.match_end == 11
def test_fullmatch_assertion(self): r = get_code(r"(?=a).b") assert rsre_core.fullmatch(r, "ab") r = get_code(r"(?!a)..") assert not rsre_core.fullmatch(r, "ab")
def test_at(self): r = get_code(r"abc$") assert rsre_core.match(r, "abc") assert not rsre_core.match(r, "abcd") assert not rsre_core.match(r, "ab")
def test_repeated_single_character_pattern(self): r = get_code(r"foo(?:(?<=foo)x)+$") assert rsre_core.match(r, "foox")
def test_flatten_marks(self): r = get_code(r"a(b)c((d)(e))+$") res = rsre_core.match(r, "abcdedede") assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9] assert res.flatten_marks() == [0, 9, 1, 2, 7, 9, 7, 8, 8, 9]
def test_assert_group(self): r = get_code(r"abc(?=(..)f)(.)") res = rsre_core.match(r, "abcdefghi") assert res is not None assert res.span(2) == (3, 4) assert res.span(1) == (3, 5)
def test_get_code_repetition(): c1 = get_code(r"a+") c2 = get_code(r"a+") assert c1 == c2
def test_get_code_repetition(): c1 = get_code(r"a+") c2 = get_code(r"a+") assert c1.pattern == c2.pattern
def test_category(self): r = get_code(r"[\sx]") assert rsre_core.match(r, "x") assert rsre_core.match(r, " ") assert not rsre_core.match(r, "n")