def test_add_variant_sequence(self): c = CharSequence([0x002A, 0x002B]) c.add_variant([0x0030]) self.assertEqual(len(c._variants), 1) self.assertIn((0x0030, ), c._variants) self.assertEqual(len(c._variants[(0x0030, )]), 1) self.assertEqual(c._variants[(0x0030, )][0].cp, (0x0030, )) self.assertRaises(VariantAlreadyExists, c.add_variant, [0x0030]) c.add_variant([0x0030], when='w1') self.assertRaises(VariantAlreadyExists, c.add_variant, [0x0030], when='w1') c.add_variant([0x0030], not_when='w1') self.assertRaises(VariantAlreadyExists, c.add_variant, [0x0030], not_when='w1') c.add_variant([0x0030], when='w1', not_when='nw-1') self.assertRaises(VariantAlreadyExists, c.add_variant, [0x0030], when='w1', not_when='nw-1') self.assertEqual(len(c._variants[(0x0030, )]), 4)
def test_all_repertoire(self): self.cd.add_char([0x0010]) self.cd.add_range(0x0001, 0x0005) self.cd.add_char([0x0000]) self.cd.add_char([0x0011, 0x0012]) self.cd.add_char([0x0013]) # Full output expected_output = { Char(0x0000), RangeChar(0x0001, 0x0001, 0x0005), RangeChar(0x0002, 0x0001, 0x0005), RangeChar(0x0003, 0x0001, 0x0005), RangeChar(0x0004, 0x0001, 0x0005), RangeChar(0x0005, 0x0001, 0x0005), Char(0x0010), CharSequence([0x0011, 0x0012]), Char(0x0013), } self.assertEqual(set(self.cd.all_repertoire()), expected_output) # Exclude ranges expected_output = { Char(0x0000), Char(0x0010), CharSequence([0x0011, 0x0012]), Char(0x0013), } self.assertEqual(set(self.cd.all_repertoire(include_ranges=False)), expected_output) # Exclude sequences expected_output = { Char(0x0000), RangeChar(0x0001, 0x0001, 0x0005), RangeChar(0x0002, 0x0001, 0x0005), RangeChar(0x0003, 0x0001, 0x0005), RangeChar(0x0004, 0x0001, 0x0005), RangeChar(0x0005, 0x0001, 0x0005), Char(0x0010), Char(0x0013), } self.assertEqual(set(self.cd.all_repertoire(include_sequences=False)), expected_output) # Exclude ranges and sequences expected_output = { Char(0x0000), Char(0x0010), Char(0x0013), } self.assertEqual( set( self.cd.all_repertoire(include_ranges=False, include_sequences=False)), expected_output)
def test_as_index(self): self.assertEqual(Char(0x002A).as_index(), 0x002A) self.assertEqual(CharSequence((0x002A, 0x002B)).as_index(), 0x002A) self.assertEqual(RangeChar(0x002A, 0x002A, 0x02C).as_index(), 0x002A) self.assertEqual( CharBase.from_cp_or_sequence(0x002A).as_index(), 0x002A) self.assertEqual( CharBase.from_cp_or_sequence([0x002A]).as_index(), 0x002A) self.assertEqual( CharBase.from_cp_or_sequence([0x002A, 0x002B]).as_index(), 0x002A)
def test_lgr_sequence(self): self.lgr.add_cp([0x0061, 0x0062, 0x0063]) self.lgr.add_cp([0x0061, 0x0062]) __, result = compute_stats(self.lgr, {}) stats = self.STATS.copy() stats['codepoint_number'] = 2 stats['sequence_number'] = 2 stats['largest_sequence'] = CharSequence(cp_or_sequence=(0x0061, 0x0062, 0x0063)) stats['largest_sequence_len'] = 3 self.assertDictEqual(result, {'description': 'Generate stats', 'stats': stats})
def test_iter(self): self.cd.add_char([0x0010]) self.cd.add_range(0x0001, 0x000A) self.cd.add_char([0x0000]) self.cd.add_char([0x0011, 0x0012]) self.cd.add_char([0x0013]) expected_output = [ Char(0x0000), RangeChar(0x0001, 0x0001, 0x000A), Char(0x0010), CharSequence([0x0011, 0x0012]), Char(0x0013), ] self.assertEqual(list(self.cd), expected_output)
def test_hash_sequence(self): c1 = CharSequence((0x002A, 0x002B)) c2 = CharSequence((0x002A, 0x002B)) c3 = CharSequence((0x002A, 0x002C)) c4 = CharSequence((0x002A, 0x002B, 0x002C)) c5 = CharSequence((0x002A, 0x002C, 0x002B)) self.assertEqual(c1.__hash__(), c2.__hash__()) self.assertNotEqual(c1.__hash__(), c3.__hash__()) self.assertNotEqual(c1.__hash__(), c4.__hash__()) self.assertNotEqual(c1.__hash__(), c5.__hash__()) self.assertNotEqual(c3.__hash__(), c4.__hash__()) self.assertNotEqual(c3.__hash__(), c5.__hash__()) self.assertNotEqual(c4.__hash__(), c5.__hash__())