def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb rsre_char.set_unicode_db(unicodedb) # r = get_code(u"[\U00010428-\U0001044f]", re.I) assert r.pattern.count(27) == 1 # OPCODE_RANGE r.pattern[r.pattern.index(27)] = 32 # => OPCODE_RANGE_IGNORE assert rsre_core.match(r, u"\U00010428")
def test_range_ignore(self): from rpython.rlib.unicodedata import unicodedb rsre_char.set_unicode_db(unicodedb) # r = get_code(u"[\U00010428-\U0001044f]", re.I) assert r.count(27) == 1 # OPCODE_RANGE r[r.index(27)] = 32 # => OPCODE_RANGE_IGNORE assert rsre_core.match(r, u"\U00010428")
def test_getupper_getlower_unicode_ascii_shortcut(): from rpython.rlib.unicodedata import unicodedb try: rsre_char.set_unicode_db(None) for i in range(128): # works despite not having a unicode db rsre_char.getlower(i, SRE_FLAG_UNICODE) rsre_char.getupper(i, SRE_FLAG_UNICODE) finally: rsre_char.set_unicode_db(unicodedb)
from rpython.rlib.rsre import rsre_core, rsre_utf8 from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db @unwrap_spec(char_ord=int, flags=int) def w_getlower(space, char_ord, flags): return space.newint(getlower(char_ord, flags)) def w_getcodesize(space): return space.newint(CODESIZE) # use the same version of unicodedb as the standard objspace import pypy.objspace.std.unicodeobject set_unicode_db(pypy.objspace.std.unicodeobject.unicodedb) # ____________________________________________________________ # class UnicodeAsciiMatchContext(rsre_core.StrMatchContext): # we make a subclass just to mark that it originates from a W_UnicodeObject pass def slice_w(space, ctx, start, end, w_default): # 'start' and 'end' are byte positions if ctx.ZERO <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, 1, end - start))
from rpython.rlib.rsre import rsre_core from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db @unwrap_spec(char_ord=int, flags=int) def w_getlower(space, char_ord, flags): return space.wrap(getlower(char_ord, flags)) def w_getcodesize(space): return space.wrap(CODESIZE) # use the same version of unicodedb as the standard objspace import pypy.objspace.std.unicodeobject set_unicode_db(pypy.objspace.std.unicodeobject.unicodedb) # ____________________________________________________________ # def slice_w(space, ctx, start, end, w_default): if 0 <= start <= end: if isinstance(ctx, rsre_core.BufMatchContext): return space.newbytes(ctx._buffer.getslice(start, end, 1, end-start)) if isinstance(ctx, rsre_core.StrMatchContext): return space.newbytes(ctx._string[start:end]) elif isinstance(ctx, rsre_core.UnicodeMatchContext): return space.newunicode(ctx._unicodestr[start:end]) else:
def setup_module(mod): from rpython.rlib.unicodedata import unicodedb rsre_char.set_unicode_db(unicodedb)
""" This is not used in a PyPy translation, but it can be used in RPython code. It exports the same interface as the Python 're' module. You can call the functions at the start of the module (expect the ones with @not_rpython for now). They must be called with a *constant* pattern string. """ import re, sys from rpython.rlib.rsre import rsre_core, rsre_char from rpython.rlib.rsre.rpy import get_code as _get_code from rpython.rlib.unicodedata import unicodedb from rpython.rlib.objectmodel import specialize, we_are_translated from rpython.rlib.objectmodel import not_rpython rsre_char.set_unicode_db(unicodedb) I = IGNORECASE = re.I # ignore case L = LOCALE = re.L # assume current 8-bit locale U = UNICODE = re.U # assume unicode locale M = MULTILINE = re.M # make anchors look for newline S = DOTALL = re.S # make dot match newline X = VERBOSE = re.X # ignore whitespace and comments @specialize.call_location() def match(pattern, string, flags=0): return compile(pattern, flags).match(string) @specialize.call_location() def search(pattern, string, flags=0): return compile(pattern, flags).search(string)
""" This is not used in a PyPy translation, but it can be used in RPython code. It exports the same interface as the Python 're' module. You can call the functions at the start of the module (expect the ones with NOT_RPYTHON for now). They must be called with a *constant* pattern string. """ import re, sys from rpython.rlib.rsre import rsre_core, rsre_char from rpython.rlib.rsre.rpy import get_code as _get_code from rpython.rlib.unicodedata import unicodedb from rpython.rlib.objectmodel import specialize, we_are_translated rsre_char.set_unicode_db(unicodedb) I = IGNORECASE = re.I # ignore case L = LOCALE = re.L # assume current 8-bit locale U = UNICODE = re.U # assume unicode locale M = MULTILINE = re.M # make anchors look for newline S = DOTALL = re.S # make dot match newline X = VERBOSE = re.X # ignore whitespace and comments @specialize.call_location() def match(pattern, string, flags=0): return compile(pattern, flags).match(string) @specialize.call_location() def search(pattern, string, flags=0): return compile(pattern, flags).search(string)