Example #1
0
    def test_length(self):
        char_class = CharacterClass('0-9A-Z')
        self.assertListEqual(char_class.positive.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 36)

        char_class.complement()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 36)

        char_class.add('k-m')
        self.assertListEqual(char_class.positive.codepoints, [(107, 110)])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffk-m]')
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 36)

        char_class.add('K-M')
        self.assertListEqual(char_class.positive.codepoints, [(75, 78),
                                                              (107, 110)])
        self.assertListEqual(char_class.negative.codepoints, [(48, 58),
                                                              (65, 91)])
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 33)
        self.assertEqual(str(char_class), '[\x00-/:-@\\[-\U0010ffffK-Mk-m]')

        char_class.clear()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 0)
Example #2
0
    def test_discard(self):
        char_class = CharacterClass('0-9')
        char_class.discard('6-9')
        self.assertListEqual(char_class.positive.codepoints, [(48, 54)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), 6)

        char_class.add(r'\p{Nd}')
        self.assertEqual(len(char_class), 630)

        char_class.discard(r'\p{Nd}')
        self.assertEqual(len(char_class), 0)

        with self.assertRaises(RegexError):
            char_class.discard(r'\p{}')

        with self.assertRaises(RegexError):
            char_class.discard(r'\p{XYZ}')

        char_class.add(r'\P{Nd}')
        self.assertEqual(len(char_class), sys.maxunicode + 1 - 630)

        char_class.discard(r'\P{Nd}')
        self.assertEqual(len(char_class), 0)

        char_class = CharacterClass('a-z')
        char_class.discard(r'\p{IsFoo}')
        self.assertEqual(len(char_class), 0)

        char_class = CharacterClass()
        char_class.complement()
        char_class.discard('\\n')
        self.assertListEqual(char_class.positive.codepoints, [(0, 10),
                                                              (11, 1114112)])
        self.assertListEqual(char_class.negative.codepoints, [])
        self.assertEqual(len(char_class), sys.maxunicode)
        char_class.discard('\\s')
        self.assertListEqual(char_class.positive.codepoints, [(0, 9), (11, 13),
                                                              (14, 32),
                                                              (33, 1114112)])
        self.assertEqual(len(char_class), sys.maxunicode - 3)
        char_class.discard('\\S')
        self.assertEqual(len(char_class), 0)

        char_class.clear()
        char_class.negative.codepoints.append(10)
        char_class.discard('\\s')
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(9, 11), 13, 32])

        char_class = CharacterClass('\t')
        char_class.complement()
        self.assertListEqual(char_class.negative.codepoints, [9])
        char_class.discard('\\n')
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(9, 11)])
        self.assertEqual(len(char_class), sys.maxunicode - 1)