Exemple #1
0
    def __init__(self):
        from lepl.matchers.core import Any
        from lepl.matchers.combine import Or
        max_ = chr(maxunicode)

        def mkhex(char, n):
            from lepl.matchers.derived import Drop
            return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                        (lambda x: chr(int(x, 16)))

        def mkchr(char, range, invert=False):
            from lepl.matchers.core import Literal
            from lepl.matchers.derived import Map
            from lepl.regexp.core import Character
            intervals = lmap(lambda x: (x, x), range)
            if invert:
                # this delays call to invert until after creation of self
                func = lambda _: Character(self.invert(intervals), self)
            else:
                func = lambda _: Character(intervals, self)
            return Map(Literal(char), func)

        range = Or(mkchr('s', _WHITESPACE), mkchr('S',
                                                  _WHITESPACE,
                                                  invert=True))
        escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8)
        super(UnicodeAlphabet, self).__init__(chr(0),
                                              max_,
                                              escaped=escaped,
                                              range=range)
Exemple #2
0
    def test_inline_flags(self):
        # Bug #1700
        upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
        lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow

        p = self._re.compile(upper_char, self._re.I | self._re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = self._re.compile(lower_char, self._re.I | self._re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = self._re.compile('(?i)' + upper_char, self._re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = self._re.compile('(?i)' + lower_char, self._re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = self._re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = self._re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)
Exemple #3
0
    def test_re_escape(self):
        p=""
        self.assertEqual(self._re.escape(p), p)
        for i in range(0, 256):
            p = p + chr(i)
            self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)) is not None,
                             True)
            self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)).span(), (0,1))

        pat=self._re.compile(self._re.escape(p))
        self.assertEqual(pat.match(p) is not None, True)
        self.assertEqual(pat.match(p).span(), (0,256))
Exemple #4
0
    def test_basic_re_sub(self):
        self.assertEqual(self._re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
        self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
                         '9.3 -3 24x100y')
        self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
                         '9.3 -3 23x99y')

        self.assertEqual(self._re.sub('.', lambda m: r"\n", 'x'), '\\n')
        self.assertEqual(self._re.sub('.', r"\n", 'x'), '\n')

        s = r"\1\1"
        self.assertEqual(self._re.sub('(.)', s, 'x'), 'xx')
        self.assertEqual(self._re.sub('(.)', self._re.escape(s), 'x'), s)
        self.assertEqual(self._re.sub('(.)', lambda m: s, 'x'), s)

        self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
        self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
        self.assertEqual(self._re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
        self.assertEqual(self._re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')

        self.assertEqual(self._re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
        self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
        self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'),
                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))

        self.assertEqual(self._re.sub('^\s*', 'X', 'test'), 'Xtest')
Exemple #5
0
 def __init__(self):
     from lepl.matchers.core import Any
     from lepl.matchers.combine import Or
     max_ = chr(maxunicode)
     def mkhex(char, n):
         from lepl.matchers.derived import Drop
         return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                     (lambda x: chr(int(x, 16)))
     def mkchr(char, range, invert=False):
         from lepl.matchers.core import Literal
         from lepl.matchers.derived import Map
         from lepl.regexp.core import Character
         intervals = lmap(lambda x: (x, x), range)
         if invert:
             # this delays call to invert until after creation of self
             func = lambda _: Character(self.invert(intervals), self)
         else:
             func = lambda _: Character(intervals, self)
         return Map(Literal(char), func)
     range = Or(mkchr('s', _WHITESPACE),
                mkchr('S', _WHITESPACE, invert=True))
     escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8)
     super(UnicodeAlphabet, self).__init__(chr(0), max_, escaped=escaped,
                                           range=range)
Exemple #6
0
 def test_sre_character_class_literals(self):
     for i in [0, 8, 16, 32, 64, 127, 128, 255]:
         self.assertNotEqual(self._re.match(u(r"[\%03o]") % i, chr(i)), None)
         self.assertNotEqual(self._re.match(u(r"[\%03o0]") % i, chr(i)), None)
         self.assertNotEqual(self._re.match(u(r"[\%03o8]") % i, chr(i)), None)
         if PYTHON3: # i give up
             self.assertNotEqual(self._re.match(u(r"[\x%02x]") % i, chr(i)), None)
             self.assertNotEqual(self._re.match(u(r"[\x%02x0]") % i, chr(i)), None)
             self.assertNotEqual(self._re.match(u(r"[\x%02xz]") % i, chr(i)), None)
     self.assertRaises(self._re.error, self._re.match, u("[\911]"), "")
Exemple #7
0
 def test_sre_character_literals(self):
     for i in [0, 8, 16, 32, 64, 127, 128, 255]:
         self.assertNotEqual(self._re.match(u(r"\%03o") % i, chr(i)), None)
         self.assertNotEqual(self._re.match(u(r"\%03o0") % i, chr(i)+"0"), None)
         self.assertNotEqual(self._re.match(u(r"\%03o8") % i, chr(i)+"8"), None)
         if PYTHON3:
             self.assertNotEqual(self._re.match(u(r"\x%02x") % i, chr(i)), None)
             self.assertNotEqual(self._re.match(u(r"\x%02x0") % i, chr(i)+"0"), None)
             self.assertNotEqual(self._re.match(u(r"\x%02xz") % i, chr(i)+"z"), None)
     self.assertRaises(self._re.error, self._re.match, "\911", "")
Exemple #8
0
 def after(self, char): 
     '''
     Must return the character after c in the alphabet.  Never called with
     max (assuming input data are in range).
     ''' 
     return chr(ord(char)+1)
Exemple #9
0
 def before(self, char):
     '''
     Must return the character before char in the alphabet.  Never called 
     with min (assuming input data are in range).
     ''' 
     return chr(ord(char)-1)
Exemple #10
0
 def mkhex(char, n):
     from lepl.matchers.derived import Drop
     return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                 (lambda x: chr(int(x, 16)))
Exemple #11
0
 def after(self, char):
     '''
     Must return the character after c in the alphabet.  Never called with
     max (assuming input data are in range).
     '''
     return chr(ord(char) + 1)
Exemple #12
0
 def before(self, char):
     '''
     Must return the character before char in the alphabet.  Never called 
     with min (assuming input data are in range).
     '''
     return chr(ord(char) - 1)
Exemple #13
0
 def mkhex(char, n):
     from lepl.matchers.derived import Drop
     return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                 (lambda x: chr(int(x, 16)))