def __init__(self): from lepl.matchers.core import Any from lepl.matchers.combine import Or max_ = chr(maxunicode) def mkhex(char, n): from lepl.matchers.derived import Drop return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \ (lambda x: chr(int(x, 16))) def mkchr(char, range, invert=False): from lepl.matchers.core import Literal from lepl.matchers.derived import Map from lepl.regexp.core import Character intervals = lmap(lambda x: (x, x), range) if invert: # this delays call to invert until after creation of self func = lambda _: Character(self.invert(intervals), self) else: func = lambda _: Character(intervals, self) return Map(Literal(char), func) range = Or(mkchr('s', _WHITESPACE), mkchr('S', _WHITESPACE, invert=True)) escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8) super(UnicodeAlphabet, self).__init__(chr(0), max_, escaped=escaped, range=range)
def test_inline_flags(self): # Bug #1700 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow p = self._re.compile(upper_char, self._re.I | self._re.U) q = p.match(lower_char) self.assertNotEqual(q, None) p = self._re.compile(lower_char, self._re.I | self._re.U) q = p.match(upper_char) self.assertNotEqual(q, None) p = self._re.compile('(?i)' + upper_char, self._re.U) q = p.match(lower_char) self.assertNotEqual(q, None) p = self._re.compile('(?i)' + lower_char, self._re.U) q = p.match(upper_char) self.assertNotEqual(q, None) p = self._re.compile('(?iu)' + upper_char) q = p.match(lower_char) self.assertNotEqual(q, None) p = self._re.compile('(?iu)' + lower_char) q = p.match(upper_char) self.assertNotEqual(q, None)
def test_re_escape(self): p="" self.assertEqual(self._re.escape(p), p) for i in range(0, 256): p = p + chr(i) self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)) is not None, True) self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)).span(), (0,1)) pat=self._re.compile(self._re.escape(p)) self.assertEqual(pat.match(p) is not None, True) self.assertEqual(pat.match(p).span(), (0,256))
def test_basic_re_sub(self): self.assertEqual(self._re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), '9.3 -3 24x100y') self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), '9.3 -3 23x99y') self.assertEqual(self._re.sub('.', lambda m: r"\n", 'x'), '\\n') self.assertEqual(self._re.sub('.', r"\n", 'x'), '\n') s = r"\1\1" self.assertEqual(self._re.sub('(.)', s, 'x'), 'xx') self.assertEqual(self._re.sub('(.)', self._re.escape(s), 'x'), s) self.assertEqual(self._re.sub('(.)', lambda m: s, 'x'), s) self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') self.assertEqual(self._re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') self.assertEqual(self._re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') self.assertEqual(self._re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) self.assertEqual(self._re.sub('^\s*', 'X', 'test'), 'Xtest')
def test_sre_character_class_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: self.assertNotEqual(self._re.match(u(r"[\%03o]") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"[\%03o0]") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"[\%03o8]") % i, chr(i)), None) if PYTHON3: # i give up self.assertNotEqual(self._re.match(u(r"[\x%02x]") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"[\x%02x0]") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"[\x%02xz]") % i, chr(i)), None) self.assertRaises(self._re.error, self._re.match, u("[\911]"), "")
def test_sre_character_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255]: self.assertNotEqual(self._re.match(u(r"\%03o") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"\%03o0") % i, chr(i)+"0"), None) self.assertNotEqual(self._re.match(u(r"\%03o8") % i, chr(i)+"8"), None) if PYTHON3: self.assertNotEqual(self._re.match(u(r"\x%02x") % i, chr(i)), None) self.assertNotEqual(self._re.match(u(r"\x%02x0") % i, chr(i)+"0"), None) self.assertNotEqual(self._re.match(u(r"\x%02xz") % i, chr(i)+"z"), None) self.assertRaises(self._re.error, self._re.match, "\911", "")
def after(self, char): ''' Must return the character after c in the alphabet. Never called with max (assuming input data are in range). ''' return chr(ord(char)+1)
def before(self, char): ''' Must return the character before char in the alphabet. Never called with min (assuming input data are in range). ''' return chr(ord(char)-1)
def mkhex(char, n): from lepl.matchers.derived import Drop return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \ (lambda x: chr(int(x, 16)))
def after(self, char): ''' Must return the character after c in the alphabet. Never called with max (assuming input data are in range). ''' return chr(ord(char) + 1)
def before(self, char): ''' Must return the character before char in the alphabet. Never called with min (assuming input data are in range). ''' return chr(ord(char) - 1)