Exemple #1
0
 def test_bug_926075(self):
     try:
         unicode
     except NameError:
         return # no problem if we have no unicode
     assert (re.compile('bug_926075') is not
                  re.compile(eval("u'bug_926075'")))
Exemple #2
0
 def test_empty_array(self):
     # SF buf 1647541
     import array
     for typecode in 'cbBuhHiIlLfd':
         a = array.array(typecode)
         assert re.compile("bla").match(a) == None
         assert re.compile("").match(a).groups() == ()
Exemple #3
0
 def test_bug_926075(self):
     try:
         unicode
     except NameError:
         return  # no problem if we have no unicode
     assert (re.compile('bug_926075')
             is not re.compile(eval("u'bug_926075'")))
Exemple #4
0
def read_atom(reader):
    if IS_RPYTHON:
        int_re = '-?[0-9]+$'
        float_re = '-?[0-9][0-9.]*$'
    else:
        int_re = re.compile('-?[0-9]+$')
        float_re = re.compile('-?[0-9][0-9.]*$')
    token = reader.next()
    if re.match(int_re, token):
        return MalInt(int(token))
        ##    elif re.match(float_re, token): return int(token)
    elif token[0] == '"':
        end = len(token) - 1
        if end < 2:
            return MalStr(u"")
        else:
            s = unicode(token[1:end])
            s = types._replace(u'\\"', u'"', s)
            s = types._replace(u'\\n', u"\n", s)
            s = types._replace(u'\\\\', u"\\", s)
            return MalStr(s)
    elif token[0] == ':':
        return _keywordu(unicode(token[1:]))
    elif token == "nil":
        return types.nil
    elif token == "true":
        return types.true
    elif token == "false":
        return types.false
    else:
        return MalSym(unicode(token))
Exemple #5
0
 def test_empty_array(self):
     # SF buf 1647541
     import array
     for typecode in 'cbBuhHiIlLfd':
         a = array.array(typecode)
         assert re.compile("bla").match(a) == None
         assert re.compile("").match(a).groups() == ()
Exemple #6
0
def read_atom(reader):
    if IS_RPYTHON:
        int_re = '-?[0-9]+$'
        float_re = '-?[0-9][0-9.]*$'
    else:
        int_re = re.compile('-?[0-9]+$')
        float_re = re.compile('-?[0-9][0-9.]*$')
    token = reader.next()
    if re.match(int_re, token):     return MalInt(int(token))
##    elif re.match(float_re, token): return int(token)
    elif token[0] == '"':
        end = len(token)-1
        if end < 2:
            return MalStr(u"")
        else:
            s = unicode(token[1:end])
            s = types._replace(u'\\"', u'"', s)
            s = types._replace(u'\\n', u"\n", s)
            s = types._replace(u'\\\\', u"\\", s)
            return MalStr(s)
    elif token[0] == ':':           return _keywordu(unicode(token[1:]))
    elif token == "nil":            return types.nil
    elif token == "true":           return types.true
    elif token == "false":          return types.false
    else:                           return MalSym(unicode(token))
Exemple #7
0
def tokenize(str):
    re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\s\[\]{}()'\"`@,;]+)"
    if IS_RPYTHON:
        tok_re = re_str
    else:
        tok_re = re.compile(re_str)
    return [t for t in re.findall(tok_re, str) if t[0] != ';']
Exemple #8
0
 def test_bug_931848(self):
     try:
         unicode
     except NameError:
         pass
     pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
     assert re.compile(pattern).split("a.b.c") == (['a', 'b', 'c'])
Exemple #9
0
 def is_valid_varname(self, name):
     from rpython.rlib.rsre.rsre_re import compile
     VAR_REGEX = compile("^[a-zA-Z_][a-zA-Z0-9_]*$")
     m = VAR_REGEX.match(name)
     if m is None:
         return False
     return True
Exemple #10
0
def tokenize(str):
    re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
    if IS_RPYTHON:
        tok_re = re_str
    else:
        tok_re = re.compile(re_str)
    return [t for t in re.findall(tok_re, str) if t[0] != ';']
Exemple #11
0
 def test_bug_931848(self):
     try:
         unicode
     except NameError:
         pass
     pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
     assert re.compile(pattern).split("a.b.c") == (
                      ['a','b','c'])
Exemple #12
0
 def test_bug_764548(self):
     # bug 764548, re.compile() barfs on str/unicode subclasses
     try:
         unicode
     except NameError:
         return  # no problem if we have no unicode
     class my_unicode(unicode): pass
     pat = re.compile(my_unicode("abc"))
     assert pat.match("xyz") == None
Exemple #13
0
    def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        assert iter.next().span() == (1, 2)
        py.test.raises(StopIteration, iter.next)

        if 0:  # XXX
            scanner = re.compile(r"\s").scanner("a b")
            assert scanner.search().span() == (1, 2)
            assert scanner.search() == None
Exemple #14
0
 def pickle_test(self, pickle):
     oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
     s = pickle.dumps(oldpat)
     newpat = pickle.loads(s)
     # Not using object identity for _sre.py, since some Python builds do
     # not seem to preserve that in all cases (observed on an UCS-4 build
     # of 2.4.1).
     #self.assertEqual(oldpat, newpat)
     assert oldpat.__dict__ == newpat.__dict__
Exemple #15
0
 def pickle_test(self, pickle):
     oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
     s = pickle.dumps(oldpat)
     newpat = pickle.loads(s)
     # Not using object identity for _sre.py, since some Python builds do
     # not seem to preserve that in all cases (observed on an UCS-4 build
     # of 2.4.1).
     #self.assertEqual(oldpat, newpat)
     assert oldpat.__dict__ == newpat.__dict__
Exemple #16
0
    def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        assert iter.next().span() == (1,2)
        py.test.raises(StopIteration, iter.next)

        if 0:    # XXX
            scanner = re.compile(r"\s").scanner("a b")
            assert scanner.search().span() == (1, 2)
            assert scanner.search() == None
Exemple #17
0
    def test_re_escape(self):
        p=""
        for i in range(0, 256):
            p = p + chr(i)
            assert re.match(re.escape(chr(i)), chr(i)) is not None
            assert re.match(re.escape(chr(i)), chr(i)).span() == (0,1)

        pat=re.compile(re.escape(p))
        assert pat.match(p) is not None
        assert pat.match(p).span() == (0,256)
Exemple #18
0
    def test_re_escape(self):
        p = ""
        for i in range(0, 256):
            p = p + chr(i)
            assert re.match(re.escape(chr(i)), chr(i)) is not None
            assert re.match(re.escape(chr(i)), chr(i)).span() == (0, 1)

        pat = re.compile(re.escape(p))
        assert pat.match(p) is not None
        assert pat.match(p).span() == (0, 256)
Exemple #19
0
    def test_bug_764548(self):
        # bug 764548, re.compile() barfs on str/unicode subclasses
        try:
            unicode
        except NameError:
            return  # no problem if we have no unicode

        class my_unicode(unicode):
            pass

        pat = re.compile(my_unicode("abc"))
        assert pat.match("xyz") == None
Exemple #20
0
    def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0)  # Latin Capital Letter A with Dot Bellow
        lower_char = unichr(0x1ea1)  # Latin Small Letter A with Dot Bellow

        p = re.compile(upper_char, re.I | re.U)
        q = p.match(lower_char)
        assert q != None

        p = re.compile(lower_char, re.I | re.U)
        q = p.match(upper_char)
        assert q != None

        p = re.compile('(?i)' + upper_char, re.U)
        q = p.match(lower_char)
        assert q != None

        p = re.compile('(?i)' + lower_char, re.U)
        q = p.match(upper_char)
        assert q != None

        p = re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        assert q != None

        p = re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        assert q != None
Exemple #21
0
    def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow

        p = re.compile(upper_char, re.I | re.U)
        q = p.match(lower_char)
        assert q != None

        p = re.compile(lower_char, re.I | re.U)
        q = p.match(upper_char)
        assert q != None

        p = re.compile('(?i)' + upper_char, re.U)
        q = p.match(lower_char)
        assert q != None

        p = re.compile('(?i)' + lower_char, re.U)
        q = p.match(upper_char)
        assert q != None

        p = re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        assert q != None

        p = re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        assert q != None
Exemple #22
0
    def test_re_match(self):
        assert re.match('a', 'a').groups() == ()
        assert re.match('(a)', 'a').groups() == ('a',)
        assert re.match(r'(a)', 'a').group(0) == 'a'
        assert re.match(r'(a)', 'a').group(1) == 'a'
        #assert re.match(r'(a)', 'a').group(1, 1) == ('a', 'a')

        pat = re.compile('((a)|(b))(c)?')
        assert pat.match('a').groups() == ('a', 'a', None, None)
        assert pat.match('b').groups() == ('b', None, 'b', None)
        assert pat.match('ac').groups() == ('a', 'a', None, 'c')
        assert pat.match('bc').groups() == ('b', None, 'b', 'c')
        assert pat.match('bc').groups("") == ('b', "", 'b', 'c')

        # A single group
        m = re.match('(a)', 'a')
        assert m.group(0) == 'a'
        assert m.group(0) == 'a'
        assert m.group(1) == 'a'
        #assert m.group(1, 1) == ('a', 'a')

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
Exemple #23
0
    def test_re_match(self):
        assert re.match('a', 'a').groups() == ()
        assert re.match('(a)', 'a').groups() == ('a', )
        assert re.match(r'(a)', 'a').group(0) == 'a'
        assert re.match(r'(a)', 'a').group(1) == 'a'
        #assert re.match(r'(a)', 'a').group(1, 1) == ('a', 'a')

        pat = re.compile('((a)|(b))(c)?')
        assert pat.match('a').groups() == ('a', 'a', None, None)
        assert pat.match('b').groups() == ('b', None, 'b', None)
        assert pat.match('ac').groups() == ('a', 'a', None, 'c')
        assert pat.match('bc').groups() == ('b', None, 'b', 'c')
        assert pat.match('bc').groups("") == ('b', "", 'b', 'c')

        # A single group
        m = re.match('(a)', 'a')
        assert m.group(0) == 'a'
        assert m.group(0) == 'a'
        assert m.group(1) == 'a'
        #assert m.group(1, 1) == ('a', 'a')

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
Exemple #24
0
    def test_re_groupref_exists(self):
        assert re.match('^(\()?([^()]+)(?(1)\))$',
                        '(a)').groups() == (('(', 'a'))
        assert re.match('^(\()?([^()]+)(?(1)\))$',
                        'a').groups() == ((None, 'a'))
        assert re.match('^(\()?([^()]+)(?(1)\))$', 'a)') == None
        assert re.match('^(\()?([^()]+)(?(1)\))$', '(a') == None
        assert re.match('^(?:(a)|c)((?(1)b|d))$',
                        'ab').groups() == (('a', 'b'))
        assert re.match('^(?:(a)|c)((?(1)b|d))$',
                        'cd').groups() == ((None, 'd'))
        assert re.match('^(?:(a)|c)((?(1)|d))$',
                        'cd').groups() == ((None, 'd'))
        assert re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups() == (('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        assert p.match('abc').groups() == (('a', 'b', 'c'))
        assert p.match('ad').groups() == (('a', None, 'd'))
        assert p.match('abd') == None
        assert p.match('ac') == None
Exemple #25
0
    def test_re_groupref_exists(self):
        assert re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups() == (
                         ('(', 'a'))
        assert re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups() == (
                         (None, 'a'))
        assert re.match('^(\()?([^()]+)(?(1)\))$', 'a)') == None
        assert re.match('^(\()?([^()]+)(?(1)\))$', '(a') == None
        assert re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups() == (
                         ('a', 'b'))
        assert re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups() == (
                         (None, 'd'))
        assert re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups() == (
                         (None, 'd'))
        assert re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups() == (
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        assert p.match('abc').groups() == (
                         ('a', 'b', 'c'))
        assert p.match('ad').groups() == (
                         ('a', None, 'd'))
        assert p.match('abd') == None
        assert p.match('ac') == None
Exemple #26
0
from rpython.rlib.rsre.rsre_re import compile

num_lit_exp = r'(?:[+-]?((?:(?:\d+)(?:\.\d*)?)|Infinity|(?:\.[0-9]+))(?:[eE][\+\-]?[0-9]*)?)'
num_lit_rexp = compile(num_lit_exp)
num_rexp = compile(r'^%s$' % num_lit_exp)
hex_rexp = compile(r'^0[xX]([\dA-Fa-f]+)$')
oct_rexp = compile(r'^0([0-7]+)$')
Exemple #27
0
from rpython.rlib.rsre.rsre_re import compile


VARIABLENAME = r"\$[a-zA-Z_][a-zA-Z0-9_]*"

# array index regex matching the index and the brackets
ARRAYINDEX = r"\[(?:[0-9]+|%s)\]" % VARIABLENAME

# match variables and array access
VARIABLE = r"(%s(?:%s)*)" % (VARIABLENAME, ARRAYINDEX)
CURLYVARIABLE = compile(r"{?" + VARIABLE + "}?")
Exemple #28
0
 def _compile(self, re):
     if self.use_rsre:
         from rpython.rlib.rsre.rsre_re import compile, M, DOTALL, IGNORECASE
     else:
         from re import compile, M, DOTALL, IGNORECASE
     return compile(re, IGNORECASE | M | DOTALL)
Exemple #29
0
    pattern = [n] * n
    string = chr(n) * n
    rsre_core.search(pattern, string)
    #
    unicodestr = unichr(n) * n
    ctx = rsre_core.UnicodeMatchContext(pattern, unicodestr,
                                        0, len(unicodestr), 0)
    rsre_core.search_context(ctx)
    #
    return 0


def test_gengraph():
    t, typer, graph = gengraph(main, [int])

m = compile("(a|b)aaaaa")

def test_match():
    def f(i):
        if i:
            s = "aaaaaa"
        else:
            s = "caaaaa"
        g = m.match(s)
        if g is None:
            return 3
        return int("aaaaaa" == g.group(0))
    assert interpret(f, [3]) == 1
    assert interpret(f, [0]) == 3

def test_translates():
Exemple #30
0
    port.write(str)
    return return_void(env, cont)

format_dict = {
    '~n': '\n',
    '~%': '\n',
    '~a': None,
    '~A': None,
    '~e': None,
    '~E': None,
    '~s': None,
    '~S': None,
    '~v': None,
    '~V': None,
}
format_regex = re.compile("|".join(format_dict.keys()))

@jit.unroll_safe
def format(form, v):
    text = form.value
    result = StringBuilder()
    pos = 0
    for match in format_regex.finditer(text):
        match_start = match.start()
        assert match_start >= 0
        result.append_slice(text, pos, match_start)
        val = format_dict[match.group()]
        if val is None:
            val, v = v[0].tostring(), v[1:]
        result.append(val)
        pos = match.end()
Exemple #31
0
 def test_bug_612074(self):
     pat=u"["+re.escape(u"\u2039")+u"]"
     assert re.compile(pat) and 1 == 1
Exemple #32
0
 def test_flags(self):
     for flag in [re.I, re.M, re.X, re.S, re.L]:
         assert re.compile('^pattern$', flag) != None
Exemple #33
0
 def test_flags(self):
     for flag in [re.I, re.M, re.X, re.S, re.L]:
         assert re.compile('^pattern$', flag) != None
Exemple #34
0
 def test_bug_612074(self):
     pat = u"[" + re.escape(u"\u2039") + u"]"
     assert re.compile(pat) and 1 == 1
Exemple #35
0
 def __init__(self):
     self.rules = []
     for rule, token_name in RULES:
         self.rules.append((compile(rule, M | IGNORECASE), token_name))
Exemple #36
0
import rpython.rlib.rsre.rsre_re as re
from numbers import wrap_int, wrap_bigint, int_zero, bigint_zero
from fn import AFn, wrap_fn
from cons import create_from_list as create_list
from rpython.rlib.rbigint import rbigint
from rpython.rlib.rarithmetic import LONG_BIT

int_pat = re.compile(
    "^([-+]?)(?:(0)|([1-9][0-9]*)|0[xX]([0-9A-Fa-f]+)|0([0-7]+)|([1-9][0-9]?)[rR]([0-9A-Za-z]+)|0[0-9]+)(N)?$"
)


def is_whitespace(ch):
    return ch in '\n\r ,\t'


def is_digit(ch):
    return ch in '0123456789'


def read(r, eof_is_error, eof_value, is_recursive):
    while True:
        ch = r.read()

        while is_whitespace(ch):
            ch = r.read()

        if ch == "":
            if eof_is_error:
                raise RuntimeError("EOF while reading")
            return eof_value
Exemple #37
0
from rpython.rlib.rsre.rsre_re import compile


VARIABLENAME = r"\$[a-zA-Z_][a-zA-Z0-9_]*"

# array index regex matching the index and the brackets
ARRAYINDEX = r"\[(?:[0-9]+|%s)\]" % VARIABLENAME

# match variables and array access
VARIABLE = r"(%s(?:%s)*)" % (VARIABLENAME, ARRAYINDEX)
CURLYVARIABLE = compile(r"{?" + VARIABLE + "}?")

ARRAYINDEX = compile(r"\[([0-9]+|%s)\]" % VARIABLENAME)
Exemple #38
0
 def __init__(self):
     self.rules = []
     for rule, token_name in RULES:
         self.rules.append((compile(rule, M | IGNORECASE),
                            token_name))
Exemple #39
0
            u"'": QuoteReader(),
            u":": KeywordReader(),
            u"\"": LiteralStringReader(),
            u"\\": LiteralCharacterReader(),
            u"@": DerefReader(),
            u"`": SyntaxQuoteReader(),
            u"~": UnquoteReader(),
            u"^": MetaReader(),
            u"#": DispatchReader(),
            u";": LineCommentReader()
}

# inspired by https://github.com/clojure/tools.reader/blob/9ee11ed/src/main/clojure/clojure/tools/reader/impl/commons.clj#L45
#                           sign      hex                    oct      radix                           decimal
#                           1         2      3               4        5                 6             7
int_matcher = re.compile(u'^([-+]?)(?:(0[xX])([0-9a-fA-F]+)|0([0-7]+)|([1-9][0-9]?)[rR]([0-9a-zA-Z]+)|([0-9]*))$')

float_matcher = re.compile(u'^([-+]?[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$')
ratio_matcher = re.compile(u'^([-+]?[0-9]+)/([0-9]+)$')

def parse_int(m):
    sign = 1
    if m.group(1) == u'-':
        sign = -1

    radix = 10

    if m.group(7):
        num = m.group(7)
    elif m.group(2):
        radix = 16
Exemple #40
0
 def __init__(self):
     self.rules = []
     for regex, type in RULES:
         self.rules.append((compile(regex, M | DOTALL), type))
Exemple #41
0
import rpython.rlib.rsre.rsre_re as re
from numbers import wrap_int, wrap_bigint, int_zero, bigint_zero
from fn import AFn, wrap_fn
from cons import create_from_list as create_list
from rpython.rlib.rbigint import rbigint
from rpython.rlib.rarithmetic import LONG_BIT

int_pat = re.compile("^([-+]?)(?:(0)|([1-9][0-9]*)|0[xX]([0-9A-Fa-f]+)|0([0-7]+)|([1-9][0-9]?)[rR]([0-9A-Za-z]+)|0[0-9]+)(N)?$")

def is_whitespace(ch):
    return ch in '\n\r ,\t'

def is_digit(ch):
    return ch in '0123456789'



def read(r, eof_is_error, eof_value, is_recursive):
    while True:
        ch = r.read()

        while is_whitespace(ch):
            ch = r.read()

        if ch == "":
            if eof_is_error:
                raise RuntimeError("EOF while reading")
            return eof_value

        if is_digit(ch):
            n = read_number(r, ch)
Exemple #42
0
 def _compile(self, re):
     if self.use_rsre:
         from rpython.rlib.rsre.rsre_re import compile, M, DOTALL, IGNORECASE
     else:
         from re import compile, M, DOTALL, IGNORECASE
     return compile(re, IGNORECASE | M | DOTALL)