def test_min_repeat_one_with_backref(): # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE r = [ MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS ] assert rsre_core.match(rsre_core.CompiledPattern(r), "aaab").match_end == 4
def SRE_Pattern__new__(space, w_subtype, w_pattern, flags, w_code, groups=0, w_groupindex=None, w_indexgroup=None): n = space.len_w(w_code) code = [ intmask(space.uint_w(space.getitem(w_code, space.newint(i)))) for i in range(n) ] # w_srepat = space.allocate_instance(W_SRE_Pattern, w_subtype) srepat = space.interp_w(W_SRE_Pattern, w_srepat) srepat.space = space srepat.w_pattern = w_pattern # the original uncompiled pattern srepat.flags = flags # note: we assume that the app-level is caching SRE_Pattern objects, # so that we don't need to do it here. Creating new SRE_Pattern # objects all the time would be bad for the JIT, which relies on the # identity of the CompiledPattern() object. srepat.code = rsre_core.CompiledPattern(code, flags) srepat.num_groups = groups srepat.w_groupindex = w_groupindex srepat.w_indexgroup = w_indexgroup return w_srepat
def test_repeat_one_with_backref(): # Python 3.5 compiles "(.)\1*" using REPEAT_ONE instead of REPEAT: # it's a valid optimization because \1 is always one character long r = [ MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT, GROUPREF, 0, SUCCESS, SUCCESS ] assert rsre_core.match(rsre_core.CompiledPattern(r), "aaa").match_end == 3
def entrypoint2(r, string, repeat): r = rsre_core.CompiledPattern(array2list(r), 0) string = hlstr(string) match = None for i in range(repeat): match = rsre_core.search(r, string) if match is None: return -1 else: return match.match_start
def main(n): assert n >= 0 pattern = [n] * n string = chr(n) * n rsre_core.search(pattern, string) # unicodestr = unichr(n) * n pattern = rsre_core.CompiledPattern(pattern) ctx = rsre_core.UnicodeMatchContext(pattern, unicodestr, 0, len(unicodestr), 0) rsre_core.search_context(ctx) # return 0
def regexp_match(cache, re, string): pos = 0 endpos = len(string) code, flags, _, _, _, _ = regexp.compile(cache, re) return (rsre_core.StrMatchContext(string, pos, endpos, flags), rsre_core.CompiledPattern(code))
def make_ctx(self, s, offset=0): assert offset >= 0 endpos = len(s) return (rsre_core.StrMatchContext(s, offset, endpos, self.flags), rsre_core.CompiledPattern(self.code))
def compile(pattern, flags, code, *args): if not we_are_translated() and isinstance(pattern, unicode): flags |= rsre_constants.SRE_FLAG_UNICODE # for rsre_re.py raise GotIt(rsre_core.CompiledPattern([intmask(i) for i in code], flags), flags, args)
def check_charset(pattern, idx, char): p = rsre_core.CompiledPattern(pattern) return rsre_char.check_charset(Ctx(p), p, idx, char)