Example #1
0
 def test_range_ignore(self):
     from rpython.rlib.unicodedata import unicodedb
     rsre_char.set_unicode_db(unicodedb)
     #
     r = get_code(u"[\U00010428-\U0001044f]", re.I)
     assert r.pattern.count(27) == 1  # OPCODE_RANGE
     r.pattern[r.pattern.index(27)] = 32  # => OPCODE_RANGE_IGNORE
     assert rsre_core.match(r, u"\U00010428")
Example #2
0
 def test_range_ignore(self):
     from rpython.rlib.unicodedata import unicodedb
     rsre_char.set_unicode_db(unicodedb)
     #
     r = get_code(u"[\U00010428-\U0001044f]", re.I)
     assert r.count(27) == 1       # OPCODE_RANGE
     r[r.index(27)] = 32           # => OPCODE_RANGE_IGNORE
     assert rsre_core.match(r, u"\U00010428")
Example #3
0
def test_getupper_getlower_unicode_ascii_shortcut():
    from rpython.rlib.unicodedata import unicodedb
    try:
        rsre_char.set_unicode_db(None)
        for i in range(128):
            # works despite not having a unicode db
            rsre_char.getlower(i, SRE_FLAG_UNICODE)
            rsre_char.getupper(i, SRE_FLAG_UNICODE)
    finally:
        rsre_char.set_unicode_db(unicodedb)
Example #4
0
from rpython.rlib.rsre import rsre_core, rsre_utf8
from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db


@unwrap_spec(char_ord=int, flags=int)
def w_getlower(space, char_ord, flags):
    return space.newint(getlower(char_ord, flags))


def w_getcodesize(space):
    return space.newint(CODESIZE)


# use the same version of unicodedb as the standard objspace
import pypy.objspace.std.unicodeobject
set_unicode_db(pypy.objspace.std.unicodeobject.unicodedb)

# ____________________________________________________________
#


class UnicodeAsciiMatchContext(rsre_core.StrMatchContext):
    # we make a subclass just to mark that it originates from a W_UnicodeObject
    pass


def slice_w(space, ctx, start, end, w_default):
    # 'start' and 'end' are byte positions
    if ctx.ZERO <= start <= end:
        if isinstance(ctx, rsre_core.BufMatchContext):
            return space.newbytes(ctx._buffer.getslice(start, 1, end - start))
Example #5
0
from rpython.rlib.rsre import rsre_core
from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, set_unicode_db


@unwrap_spec(char_ord=int, flags=int)
def w_getlower(space, char_ord, flags):
    return space.wrap(getlower(char_ord, flags))


def w_getcodesize(space):
    return space.wrap(CODESIZE)

# use the same version of unicodedb as the standard objspace
import pypy.objspace.std.unicodeobject
set_unicode_db(pypy.objspace.std.unicodeobject.unicodedb)

# ____________________________________________________________
#


def slice_w(space, ctx, start, end, w_default):
    if 0 <= start <= end:
        if isinstance(ctx, rsre_core.BufMatchContext):
            return space.newbytes(ctx._buffer.getslice(start, end, 1,
                                                        end-start))
        if isinstance(ctx, rsre_core.StrMatchContext):
            return space.newbytes(ctx._string[start:end])
        elif isinstance(ctx, rsre_core.UnicodeMatchContext):
            return space.newunicode(ctx._unicodestr[start:end])
        else:
Example #6
0
def setup_module(mod):
    from rpython.rlib.unicodedata import unicodedb
    rsre_char.set_unicode_db(unicodedb)
Example #7
0
def setup_module(mod):
    from rpython.rlib.unicodedata import unicodedb
    rsre_char.set_unicode_db(unicodedb)
Example #8
0
"""
This is not used in a PyPy translation, but it can be used
in RPython code.  It exports the same interface as the
Python 're' module.  You can call the functions at the start
of the module (expect the ones with @not_rpython for now).
They must be called with a *constant* pattern string.
"""
import re, sys
from rpython.rlib.rsre import rsre_core, rsre_char
from rpython.rlib.rsre.rpy import get_code as _get_code
from rpython.rlib.unicodedata import unicodedb
from rpython.rlib.objectmodel import specialize, we_are_translated
from rpython.rlib.objectmodel import not_rpython
rsre_char.set_unicode_db(unicodedb)


I = IGNORECASE = re.I   # ignore case
L = LOCALE     = re.L   # assume current 8-bit locale
U = UNICODE    = re.U   # assume unicode locale
M = MULTILINE  = re.M   # make anchors look for newline
S = DOTALL     = re.S   # make dot match newline
X = VERBOSE    = re.X   # ignore whitespace and comments


@specialize.call_location()
def match(pattern, string, flags=0):
    return compile(pattern, flags).match(string)

@specialize.call_location()
def search(pattern, string, flags=0):
    return compile(pattern, flags).search(string)
Example #9
0
"""
This is not used in a PyPy translation, but it can be used
in RPython code.  It exports the same interface as the
Python 're' module.  You can call the functions at the start
of the module (expect the ones with NOT_RPYTHON for now).
They must be called with a *constant* pattern string.
"""
import re, sys
from rpython.rlib.rsre import rsre_core, rsre_char
from rpython.rlib.rsre.rpy import get_code as _get_code
from rpython.rlib.unicodedata import unicodedb
from rpython.rlib.objectmodel import specialize, we_are_translated
rsre_char.set_unicode_db(unicodedb)

I = IGNORECASE = re.I  # ignore case
L = LOCALE = re.L  # assume current 8-bit locale
U = UNICODE = re.U  # assume unicode locale
M = MULTILINE = re.M  # make anchors look for newline
S = DOTALL = re.S  # make dot match newline
X = VERBOSE = re.X  # ignore whitespace and comments


@specialize.call_location()
def match(pattern, string, flags=0):
    return compile(pattern, flags).match(string)


@specialize.call_location()
def search(pattern, string, flags=0):
    return compile(pattern, flags).search(string)