예제 #1
0
    def find_second_marks(self):
        """Split clusters if a mark is a second or later stacking diacritic."""
        for exemplar in list(self.clusters.keys()):
            count = self.clusters[exemplar]
            for trailer_index in range(len(exemplar.trailers)):
                trailer = exemplar.trailers[trailer_index]

                # If the mark has already been found to be a always separate mark,
                # split the exemplar.
                if trailer in self.always_separate_marks:
                    self.split_exemplar(exemplar, trailer_index, count)

                # Only graphemes with more than one mark need to be looked at
                # for finding stacking diacritics that are separate.
                if trailer_index > 0:

                    current_mark_ccc = Char.getCombiningClass(trailer)
                    previous_mark_ccc = Char.getCombiningClass(previous_trailer)

                    # If a mark has the same combining class (ccc) as the previous mark,
                    # then the mark is a second or later stacking diacritic and is a separate mark.
                    # Also, if the mark has already been found to be a always separate mark,
                    # split the exemplar.
                    if current_mark_ccc == previous_mark_ccc:
                        self.always_separate_marks.add(trailer)
                        self.split_exemplar(exemplar, trailer_index, count)

                previous_trailer = trailer
예제 #2
0
    def find_second_marks(self):
        """Split clusters if a mark is a second or later stacking diacritic."""
        for exemplar in list(self.clusters.keys()):
            count = self.clusters[exemplar]
            for trailer_index in range(len(exemplar.trailers)):
                trailer = exemplar.trailers[trailer_index]

                # If the mark has already been found to be a always separate mark,
                # split the exemplar.
                if trailer in self.always_separate_marks:
                    self.split_exemplar(exemplar, trailer_index, count)

                # Only graphemes with more than one mark need to be looked at
                # for finding stacking diacritics that are separate.
                if trailer_index > 0:

                    current_mark_ccc = Char.getCombiningClass(trailer)
                    previous_mark_ccc = Char.getCombiningClass(previous_trailer)

                    # If a mark has the same combining class (ccc) as the previous mark,
                    # then the mark is a second or later stacking diacritic and is a separate mark.
                    # Also, if the mark has already been found to be a always separate mark,
                    # split the exemplar.
                    if current_mark_ccc == previous_mark_ccc:
                        self.always_separate_marks.add(trailer)
                        self.split_exemplar(exemplar, trailer_index, count)

                previous_trailer = trailer
예제 #3
0
 def __init__(self, uids, basename, logger):
     self.logger = logger
     self.uids = uids
     self.basename = basename
     if Char.isdefined(uids[0]):
         self.general = Char.charType(uids[0])
         self.cc = Char.getCombiningClass(uids[0])
     else:
         self.logger.log(
             'USV %04X not in ICU; no properties known' % uids[0], 'W')
     self.feats = set()  # feat tags that affect this char
     self.langs = set()  # lang tags that affect this char
예제 #4
0
 def __init__(self, uid, basename, logger):
     self.logger = logger
     self.uid = uid
     self.basename = basename
     if Char.isdefined(uid):
         self.general = Char.charType(uid)
         self.cc = Char.getCombiningClass(uid)
         self.icuGC = Char.charType(uid)
         self.icuJT = Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE)
     else:
         self.logger.log('USV %04X not in ICU; no properties known' % uid,
                         'W')
     self.feats = set()  # feat tags that affect this char
     self.langs = set()  # lang tags that affect this char
     # Additional info from UFO:
     self.takesMarks = self.isMark = self.isBase = False
예제 #5
0
 def isnukta(char):
     """True if the character is a nukta."""
     if Char.getCombiningClass(char) == 7:
         return True
     return False
예제 #6
0
 def isnukta(char):
     """True if the character is a nukta."""
     if Char.getCombiningClass(char) == 7:
         return True
     return False