def test_length(self): char_class = CharacterClass('0-9A-Z') self.assertListEqual(char_class.positive.codepoints, [(48, 58), (65, 91)]) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), 36) char_class.complement() self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, [(48, 58), (65, 91)]) self.assertEqual(len(char_class), sys.maxunicode + 1 - 36) char_class.add('k-m') self.assertListEqual(char_class.positive.codepoints, [(107, 110)]) self.assertListEqual(char_class.negative.codepoints, [(48, 58), (65, 91)]) self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffk-m]') self.assertEqual(len(char_class), sys.maxunicode + 1 - 36) char_class.add('K-M') self.assertListEqual(char_class.positive.codepoints, [(75, 78), (107, 110)]) self.assertListEqual(char_class.negative.codepoints, [(48, 58), (65, 91)]) self.assertEqual(len(char_class), sys.maxunicode + 1 - 33) self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffK-Mk-m]') char_class.clear() self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), 0)
def test_iterate(self): char_class = CharacterClass('A-Za-z') self.assertEqual(''.join(chr(c) for c in char_class), string.ascii_uppercase + string.ascii_lowercase) char_class.complement() self.assertEqual(len(''.join(chr(c) for c in char_class)), sys.maxunicode + 1 - len(string.ascii_letters))
def test_char_class_init(self): char_class = CharacterClass() self.assertEqual(char_class.positive, []) self.assertEqual(char_class.negative, []) char_class = CharacterClass('a-z') self.assertEqual(char_class.positive, [(97, 123)]) self.assertEqual(char_class.negative, [])
def test_in_operator(self): char_class = CharacterClass('A-Za-z') self.assertIn(100, char_class) self.assertIn('d', char_class) self.assertNotIn(49, char_class) self.assertNotIn('1', char_class) char_class.complement() self.assertNotIn(100, char_class) self.assertNotIn('d', char_class) self.assertIn(49, char_class) self.assertIn('1', char_class)
def test_complement(self): char_class = CharacterClass('a-z') self.assertListEqual(char_class.positive.codepoints, [(97, 123)]) self.assertListEqual(char_class.negative.codepoints, []) char_class.complement() self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, [(97, 123)]) self.assertEqual(str(char_class), '[^a-z]') char_class = CharacterClass() char_class.complement() self.assertEqual(len(char_class), sys.maxunicode + 1)
def character_class_objects(): return [CharacterClass(r'\c') for _ in range(10000)]
def run_timeit(stmt='pass', setup='pass', number=1000): seconds = timeit(stmt, setup=setup, number=number) print("{}: {}s".format(stmt, seconds)) @profile def character_class_objects(): return [CharacterClass(r'\c') for _ in range(10000)] if __name__ == '__main__': print('*' * 62) print("*** Memory and timing profile of CharacterClass class ***") print("***" + ' ' * 56 + "***") print("*** Note: save ~15% of memory with __slots__ (from v2.2.3) ***") print('*' * 62) print() character_class_objects() character_class = CharacterClass(r'\c') character_class -= CharacterClass(r'\i') SETUP = 'from __main__ import character_class' NUMBER = 10000 run_timeit('"9" in character_class # True ', SETUP, NUMBER) run_timeit('"q" in character_class # False', SETUP, NUMBER) run_timeit('8256 in character_class # True ', SETUP, NUMBER) run_timeit('8257 in character_class # False', SETUP, NUMBER)
def test_discard(self): char_class = CharacterClass('0-9') char_class.discard('6-9') self.assertListEqual(char_class.positive.codepoints, [(48, 54)]) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), 6) char_class.add(r'\p{Nd}') self.assertEqual(len(char_class), 630) char_class.discard(r'\p{Nd}') self.assertEqual(len(char_class), 0) with self.assertRaises(RegexError): char_class.discard(r'\p{}') with self.assertRaises(RegexError): char_class.discard(r'\p{XYZ}') char_class.add(r'\P{Nd}') self.assertEqual(len(char_class), sys.maxunicode + 1 - 630) char_class.discard(r'\P{Nd}') self.assertEqual(len(char_class), 0) char_class = CharacterClass('a-z') char_class.discard(r'\p{IsFoo}') self.assertEqual(len(char_class), 0) char_class = CharacterClass() char_class.complement() char_class.discard('\\n') self.assertListEqual(char_class.positive.codepoints, [(0, 10), (11, 1114112)]) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), sys.maxunicode) char_class.discard('\\s') self.assertListEqual(char_class.positive.codepoints, [(0, 9), (11, 13), (14, 32), (33, 1114112)]) self.assertEqual(len(char_class), sys.maxunicode - 3) char_class.discard('\\S') self.assertEqual(len(char_class), 0) char_class.clear() char_class.negative.codepoints.append(10) char_class.discard('\\s') self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, [(9, 11), 13, 32]) char_class = CharacterClass('\t') char_class.complement() self.assertListEqual(char_class.negative.codepoints, [9]) char_class.discard('\\n') self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, [(9, 11)]) self.assertEqual(len(char_class), sys.maxunicode - 1)
def test_add(self): char_class = CharacterClass() self.assertListEqual(char_class.positive.codepoints, []) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), 0) char_class.add('0-9') self.assertListEqual(char_class.positive.codepoints, [(48, 58)]) self.assertListEqual(char_class.negative.codepoints, []) self.assertEqual(len(char_class), 10) char_class.add(r'\p{Nd}') self.assertEqual(len(char_class), 630) with self.assertRaises(RegexError): char_class.add(r'\p{}') with self.assertRaises(RegexError): char_class.add(r'\p{XYZ}') char_class.add(r'\P{Nd}') self.assertEqual(len(char_class), sys.maxunicode + 1) char_class = CharacterClass() char_class.add(r'\p{IsFoo}')
def test_isub_operator(self): char_class = CharacterClass('A-Za-z') char_class -= CharacterClass('a-z') self.assertEqual(str(char_class), '[A-Z]') char_class = CharacterClass('a-z') other = CharacterClass('A-Za-c') other.complement() char_class -= other self.assertEqual(str(char_class), '[a-c]') char_class = CharacterClass('a-z') other = CharacterClass('A-Za-c') other.complement() other.add('b') char_class -= other self.assertEqual(str(char_class), '[ac]') char_class = CharacterClass('a-c') char_class.complement() other = CharacterClass('a-z') other.complement() char_class -= other self.assertEqual(str(char_class), '[d-z]')
def test_char_class_repr(self): char_class = CharacterClass('a-z') self.assertEqual(repr(char_class), 'CharacterClass([a-z])') char_class.complement() self.assertEqual(repr(char_class), 'CharacterClass([^a-z])')