Ejemplo n.º 1
0
 def __init__(self, uid, basename, logger):
     self.logger = logger
     self.uid = uid
     self.basename = basename
     if Char.isdefined(uid):
         self.general = Char.charType(uid)
         self.cc = Char.getCombiningClass(uid)
         self.icuGC = Char.charType(uid)
         self.icuJT = Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE)
         self.icuJG = Char.getIntPropertyValue(uid, UProperty.JOINING_GROUP)
     else:
         self.logger.log('USV %04X not in ICU; no properties known' % uid,
                         'W')
     self.feats = set()  # feat tags that affect this char
     self.langs = set()  # lang tags that affect this char
     # Additional info from UFO:
     self.takesMarks = self.isMark = self.isBase = False
Ejemplo n.º 2
0
    def is_exemplar_wordbreak(char):
        """True if the character has the Word_Break properties Katakana, ALetter, or MidLetter."""

        # The following should be exposed by PyICU, but does not seem to be implemented.
        # There are other values, but these are the ones need for this function.
        WB_ALETTER = 1
        WB_KATAKANA = 3
        WB_MIDLETTER = 4

        numeric_wordbreak_type = Char.getIntPropertyValue(char, UProperty.WORD_BREAK)
        if (numeric_wordbreak_type == WB_KATAKANA or
           numeric_wordbreak_type == WB_ALETTER or
           numeric_wordbreak_type == WB_MIDLETTER):
            return True
        return False
Ejemplo n.º 3
0
    def is_exemplar_wordbreak(char):
        """True if the character has the Word_Break properties Katakana, ALetter, or MidLetter."""

        # The following should be exposed by PyICU, but does not seem to be implemented.
        # There are other values, but these are the ones need for this function.
        WB_ALETTER = 1
        WB_KATAKANA = 3
        # WB_MIDLETTER = 4

        numeric_wordbreak_type = Char.getIntPropertyValue(char, UProperty.WORD_BREAK)
        if (numeric_wordbreak_type == WB_KATAKANA or
           # numeric_wordbreak_type == WB_MIDLETTER or
           numeric_wordbreak_type == WB_ALETTER):
            return True
        return False
Ejemplo n.º 4
0
 def render(self, uids, ftml, keyUID=0, addBreaks=True, rtl=None):
     """ general purpose (but not required) function to generate ftml for a character sequence """
     if len(uids) == 0:
         return
     # Make a copy so we don't affect caller
     uids = list(uids)
     # Remember first uid and original length for later
     startUID = uids[0]
     uidLen = len(uids)
     # if keyUID wasn't supplied, use startUID
     if keyUID == 0: keyUID = startUID
     # Construct label from uids:
     label = '\n'.join(['U+{0:04X}'.format(u) for u in uids])
     # Construct comment from glyph names:
     comment = ' '.join([self._charFromUID[u].basename for u in uids])
     # see if uid list includes a mirrored char
     hasMirrored = bool(len([x for x in uids if Char.isMirrored(x)]))
     # Analyze first and last joining char
     joiningChars = [
         x for x in uids if
         Char.getIntPropertyValue(x, UProperty.JOINING_TYPE) != TRANSPARENT
     ]
     if len(joiningChars):
         # If first or last non-TRANSPARENT char is a joining char, then we need to emit examples with zwj
         uid = joiningChars[0]
         zwjBefore = Char.getIntPropertyValue(
             uid, UProperty.JOINING_TYPE) == DUAL_JOINING or (
                 Char.charDirection(uid) == UCharDirection.LEFT_TO_RIGHT
                 and Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE)
                 == LEFT_JOINING) or (
                     Char.charDirection(uid) != UCharDirection.LEFT_TO_RIGHT
                     and Char.getIntPropertyValue(
                         uid, UProperty.JOINING_TYPE) == RIGHT_JOINING)
         uid = joiningChars[-1]
         zwjAfter = Char.getIntPropertyValue(
             uid, UProperty.JOINING_TYPE) == DUAL_JOINING or (
                 Char.charDirection(uid) == UCharDirection.LEFT_TO_RIGHT
                 and Char.getIntPropertyValue(uid, UProperty.JOINING_TYPE)
                 == RIGHT_JOINING) or (
                     Char.charDirection(uid) != UCharDirection.LEFT_TO_RIGHT
                     and Char.getIntPropertyValue(
                         uid, UProperty.JOINING_TYPE) == LEFT_JOINING)
     else:
         zwjBefore = zwjAfter = False
     if Char.charType(startUID) == UCharCategory.NON_SPACING_MARK:
         # First char is a NSM... prefix a suitable base
         uids.insert(0, self.diacBase)
         zwjBefore = False  # No longer any need to put zwj before
     elif Char.isUWhiteSpace(startUID):
         # First char is whitespace -- prefix with baseline brackets:
         uids.insert(0, 0xF130)
     lastNonMark = [
         x for x in uids
         if Char.charType(x) != UCharCategory.NON_SPACING_MARK
     ][-1]
     if Char.isUWhiteSpace(lastNonMark):
         # Last non-mark is whitespace -- append baseline brackets:
         uids.append(0xF131)
     s = ''.join([chr(uid) for uid in uids])
     if zwjBefore or zwjAfter:
         # Show contextual forms:
         t = u'{0} '.format(s)
         if zwjAfter:
             t += u'{0}\u200D '.format(s)
             if zwjBefore:
                 t += u'\u200D{0}\u200D '.format(s)
         if zwjBefore:
             t += u'\u200D{0} '.format(s)
         if zwjBefore and zwjAfter:
             t += u'{0}{0}{0}'.format(s)
         if addBreaks: ftml.closeTest()
         ftml.addToTest(keyUID, t, label=label, comment=comment, rtl=rtl)
         if addBreaks: ftml.closeTest()
     elif hasMirrored and self.rtlEnable:
         # Contains mirrored and rtl enabled:
         if addBreaks: ftml.closeTest()
         ftml.addToTest(
             keyUID,
             u'{0} LTR: \u202A{0}\u202C RTL: \u202B{0}\u202C'.format(s),
             label=label,
             comment=comment,
             rtl=rtl)
         if addBreaks: ftml.closeTest()
     # elif is LRE, RLE, PDF
     # elif is LRI, RLI, FSI, PDI
     elif uidLen > 1:
         ftml.addToTest(keyUID, s, label=label, comment=comment, rtl=rtl)
     else:
         ftml.addToTest(keyUID, s, comment=comment, rtl=rtl)