def test_codePointExists(self): for cp,expected in ((0x3130,False),(0x3131,True),(0x4DB5,True),(0x4DB6,False)): result = cu.codePointExists(cp) self.assertEquals(result,expected) if expected: cu.codePointToCharacter(cp) else: self.assertRaises(Exception,cu.codePointToCharacter,cp)
def test_codePointExist_nonBMP(self): for cp,expected in ((0x20000,True),(0x2A6D7,True)): # XXX succeeds with surrogate pairs - but it misses fact that second codepoint does not exist result = cu.codePointExists(cp) self.assertEquals(result,expected) if expected: cu.codePointToCharacter(cp) else: self.assertRaises(Exception,cu.codePointToCharacter,cp)
def test_codePointExists(self): for cp, expected in ((0x3130, False), (0x3131, True), (0x4DB5, True), (0x4DB6, False)): result = cu.codePointExists(cp) self.assertEquals(result, expected) if expected: cu.codePointToCharacter(cp) else: self.assertRaises(Exception, cu.codePointToCharacter, cp)
def test_codePointExist_nonBMP(self): for cp, expected in ( (0x20000, True), (0x2A6D7, True) ): # XXX succeeds with surrogate pairs - but it misses fact that second codepoint does not exist result = cu.codePointExists(cp) self.assertEquals(result, expected) if expected: cu.codePointToCharacter(cp) else: self.assertRaises(Exception, cu.codePointToCharacter, cp)
def hangulCompatibilityJamoToJamo_LVT(obj): # return triple L,V,T - at most two can be non-None - in some cases L and T if not isHangulCompatibilityJamo(obj): raise Exception('Not compatibility jamo V: ' + str(obj)) cp = ord(obj) data = getJamoDataForCodePoint(cp) if not data: return None cpL = data[idxL] cpV = data[idxV] cpT = data[idxT] L = codePointToCharacter(cpL) if cpL else None V = codePointToCharacter(cpV) if cpV else None T = codePointToCharacter(cpT) if cpT else None return (L,V,T)
def composeHangulJamoToSyllable(L0,V0,T0=None): if not ((L0 == None or isUnicodeChar(L0)) and isUnicodeChar(V0) and (T0 == None or isUnicodeChar(T0))): raise Exception('Jamo not unicode characters: %s %s %s' % (L0,V0,T0 if T0 != None else str(T0))) if L0 == None: L0 = emptyJamo_L if isHangulCompatibilityJamo(L0): L = hangulCompatibilityJamoToJamo_L(L0) else: L = L0 if isHangulCompatibilityJamo(V0): V = hangulCompatibilityJamoToJamo_V(V0) else: V = V0 if T0 != None: if isHangulCompatibilityJamo(T0): T = hangulCompatibilityJamoToJamo_T(T0) else: T = T0 else: T = None # or do we want empty T pseudo Jamo? LIndex = _hangulJamoToIndex_L(L) VIndex = _hangulJamoToIndex_V(V) TIndex = _hangulJamoToIndex_T(T) cpSyll = (LIndex * VCount + VIndex) * TCount + TIndex + SBase if not isCombinedS(cpSyll): raise Exception("Codepoint produced by composition not Hangul syllable: " + codePointToCodePointString(cpSyll)) return codePointToCharacter(cpSyll)
def _hangulJamoToCompatibility_X(obj): # returns compatibility jamo or None - no arg check, for internal use cp = ord(obj) data = getJamoDataForCodePoint(cp) if not data: return None cp_compat = data[idxCompat] return codePointToCharacter(cp_compat)
def test_conversions(self): for char,expected in ((u'a','U+0061'),('a','U+0061'),(u'ㄱ','U+3131'),(u'가','U+AC00'),(u'㐀','U+3400'),(u'𠀀','U+20000')): cp = cu.characterToCodepoint(char) # ord(char) self.assertTrue(cu.codePointExists(cp)) char2 = cu.codePointToCharacter(cp) self.assertTrue(cu.isUnicodeChar(char2)) self.assertEquals(char,char2) cpStr = cu.codePointToCodePointString(cp) self.assertEquals(cpStr,expected) cp2 = cu.codePointStringToCodePoint(cpStr) self.assertEquals(cp,cp2) char3 = cu.codePointStringToCharacter(cpStr) self.assertEquals(char3,char)
def test_conversions(self): for char, expected in ((u'a', 'U+0061'), ('a', 'U+0061'), (u'ㄱ', 'U+3131'), (u'가', 'U+AC00'), (u'㐀', 'U+3400'), (u'𠀀', 'U+20000')): cp = cu.characterToCodepoint(char) # ord(char) self.assertTrue(cu.codePointExists(cp)) char2 = cu.codePointToCharacter(cp) self.assertTrue(cu.isUnicodeChar(char2)) self.assertEquals(char, char2) cpStr = cu.codePointToCodePointString(cp) self.assertEquals(cpStr, expected) cp2 = cu.codePointStringToCodePoint(cpStr) self.assertEquals(cp, cp2) char3 = cu.codePointStringToCharacter(cpStr) self.assertEquals(char3, char)
def isHangulJamoOrCompatibilityJamo_T(obj): try: ensureHangulJamo_T(obj) return True except: return False # ================================================================== # # Empty L and T jamo predicates # # ================================================================== emptyJamoCodePoint_L = 0x110B # syllable-initial 'ᄋ' emptyJamo_L = codePointToCharacter(emptyJamoCodePoint_L) def isEmptyHangulJamo_L(jamo): return ord(jamo) == emptyJamoCodePoint_L # syllable-initial 'ᄋ' emptyJamoCodePoint_T = TBase # 0x11A7 # not a jamo, rather one before the first jamo emptyJamo_T = None # may not work everywhere def isEmptyHangulJamo_T(jamo): return jamo == None or ord(jamo) == emptyJamoCodePoint_T # not a jamo, rather one before the first jamo # ================================================================== # # Convert combining jamo to compatibility jamo #