Example #1
0
    def test_length(self):
        char_class = CharacterClass('0-9A-Z')
        self.assertListEqual(char_class.positive.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 36)

        char_class.complement()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 36)

        char_class.add('k-m')
        self.assertListEqual(char_class.positive.codepoints, [(107, 110)])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffk-m]')
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 36)

        char_class.add('K-M')
        self.assertListEqual(char_class.positive.codepoints, [(75, 78),
                                                              (107, 110)])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 33)
        self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffK-Mk-m]')

        char_class.clear()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 0)
Example #2
0
    def test_iterate(self):
        char_class = CharacterClass('A-Za-z')
        self.assertEqual(''.join(chr(c) for c in char_class),
                         string.ascii_uppercase + string.ascii_lowercase)

        char_class.complement()
        self.assertEqual(len(''.join(chr(c) for c in char_class)),
                         sys.maxunicode + 1 - len(string.ascii_letters))
Example #3
0
    def test_char_class_init(self):
        char_class = CharacterClass()
        self.assertEqual(char_class.positive, [])
        self.assertEqual(char_class.negative, [])

        char_class = CharacterClass('a-z')
        self.assertEqual(char_class.positive, [(97, 123)])
        self.assertEqual(char_class.negative, [])
Example #4
0
    def test_in_operator(self):
        char_class = CharacterClass('A-Za-z')
        self.assertIn(100, char_class)
        self.assertIn('d', char_class)
        self.assertNotIn(49, char_class)
        self.assertNotIn('1', char_class)

        char_class.complement()
        self.assertNotIn(100, char_class)
        self.assertNotIn('d', char_class)
        self.assertIn(49, char_class)
        self.assertIn('1', char_class)
Example #5
0
    def test_complement(self):
        char_class = CharacterClass('a-z')
        self.assertListEqual(char_class.positive.codepoints, [(97, 123)])
        self.assertListEqual(char_class.negative.codepoints, [])

        char_class.complement()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(97, 123)])
        self.assertEqual(str(char_class), '[^a-z]')

        char_class = CharacterClass()
        char_class.complement()
        self.assertEqual(len(char_class), sys.maxunicode + 1)
def character_class_objects():
    return [CharacterClass(r'\c') for _ in range(10000)]
def run_timeit(stmt='pass', setup='pass', number=1000):
    seconds = timeit(stmt, setup=setup, number=number)
    print("{}: {}s".format(stmt, seconds))


@profile
def character_class_objects():
    return [CharacterClass(r'\c') for _ in range(10000)]


if __name__ == '__main__':
    print('*' * 62)
    print("*** Memory and timing profile of CharacterClass class      ***")
    print("***" + ' ' * 56 + "***")
    print("*** Note: save ~15% of memory with __slots__ (from v2.2.3) ***")
    print('*' * 62)
    print()

    character_class_objects()

    character_class = CharacterClass(r'\c')
    character_class -= CharacterClass(r'\i')
    SETUP = 'from __main__ import character_class'
    NUMBER = 10000

    run_timeit('"9" in character_class   # True ', SETUP, NUMBER)
    run_timeit('"q" in character_class   # False', SETUP, NUMBER)
    run_timeit('8256 in character_class  # True ', SETUP, NUMBER)
    run_timeit('8257 in character_class  # False', SETUP, NUMBER)
Example #8
0
    def test_discard(self):
        char_class = CharacterClass('0-9')
        char_class.discard('6-9')
        self.assertListEqual(char_class.positive.codepoints, [(48, 54)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 6)

        char_class.add(r'\p{Nd}')
        self.assertEqual(len(char_class), 630)

        char_class.discard(r'\p{Nd}')
        self.assertEqual(len(char_class), 0)

        with self.assertRaises(RegexError):
            char_class.discard(r'\p{}')

        with self.assertRaises(RegexError):
            char_class.discard(r'\p{XYZ}')

        char_class.add(r'\P{Nd}')
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 630)

        char_class.discard(r'\P{Nd}')
        self.assertEqual(len(char_class), 0)

        char_class = CharacterClass('a-z')
        char_class.discard(r'\p{IsFoo}')
        self.assertEqual(len(char_class), 0)

        char_class = CharacterClass()
        char_class.complement()
        char_class.discard('\\n')
        self.assertListEqual(char_class.positive.codepoints, [(0, 10),
                                                              (11, 1114112)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), sys.maxunicode)
        char_class.discard('\\s')
        self.assertListEqual(char_class.positive.codepoints, [(0, 9), (11, 13),
                                                              (14, 32),
                                                              (33, 1114112)])
        self.assertEqual(len(char_class), sys.maxunicode - 3)
        char_class.discard('\\S')
        self.assertEqual(len(char_class), 0)

        char_class.clear()
        char_class.negative.codepoints.append(10)
        char_class.discard('\\s')
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(9, 11), 13, 32])

        char_class = CharacterClass('\t')
        char_class.complement()
        self.assertListEqual(char_class.negative.codepoints, [9])
        char_class.discard('\\n')
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(9, 11)])
        self.assertEqual(len(char_class), sys.maxunicode - 1)
Example #9
0
    def test_add(self):
        char_class = CharacterClass()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 0)

        char_class.add('0-9')
        self.assertListEqual(char_class.positive.codepoints, [(48, 58)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 10)

        char_class.add(r'\p{Nd}')
        self.assertEqual(len(char_class), 630)

        with self.assertRaises(RegexError):
            char_class.add(r'\p{}')

        with self.assertRaises(RegexError):
            char_class.add(r'\p{XYZ}')

        char_class.add(r'\P{Nd}')
        self.assertEqual(len(char_class), sys.maxunicode + 1)

        char_class = CharacterClass()
        char_class.add(r'\p{IsFoo}')
Example #10
0
    def test_isub_operator(self):
        char_class = CharacterClass('A-Za-z')
        char_class -= CharacterClass('a-z')
        self.assertEqual(str(char_class), '[A-Z]')

        char_class = CharacterClass('a-z')
        other = CharacterClass('A-Za-c')
        other.complement()
        char_class -= other
        self.assertEqual(str(char_class), '[a-c]')

        char_class = CharacterClass('a-z')
        other = CharacterClass('A-Za-c')
        other.complement()
        other.add('b')
        char_class -= other
        self.assertEqual(str(char_class), '[ac]')

        char_class = CharacterClass('a-c')
        char_class.complement()
        other = CharacterClass('a-z')
        other.complement()
        char_class -= other
        self.assertEqual(str(char_class), '[d-z]')
Example #11
0
 def test_char_class_repr(self):
     char_class = CharacterClass('a-z')
     self.assertEqual(repr(char_class), 'CharacterClass([a-z])')
     char_class.complement()
     self.assertEqual(repr(char_class), 'CharacterClass([^a-z])')