def composeHangulJamoToSyllable(L0,V0,T0=None): if not ((L0 == None or isUnicodeChar(L0)) and isUnicodeChar(V0) and (T0 == None or isUnicodeChar(T0))): raise Exception('Jamo not unicode characters: %s %s %s' % (L0,V0,T0 if T0 != None else str(T0))) if L0 == None: L0 = emptyJamo_L if isHangulCompatibilityJamo(L0): L = hangulCompatibilityJamoToJamo_L(L0) else: L = L0 if isHangulCompatibilityJamo(V0): V = hangulCompatibilityJamoToJamo_V(V0) else: V = V0 if T0 != None: if isHangulCompatibilityJamo(T0): T = hangulCompatibilityJamoToJamo_T(T0) else: T = T0 else: T = None # or do we want empty T pseudo Jamo? LIndex = _hangulJamoToIndex_L(L) VIndex = _hangulJamoToIndex_V(V) TIndex = _hangulJamoToIndex_T(T) cpSyll = (LIndex * VCount + VIndex) * TCount + TIndex + SBase if not isCombinedS(cpSyll): raise Exception("Codepoint produced by composition not Hangul syllable: " + codePointToCodePointString(cpSyll)) return codePointToCharacter(cpSyll)
def joinHangulChars(hg1LastChar,hg2FirstChar): if not (isUnicodeChar(hg1LastChar) and isUnicodeChar(hg2FirstChar)): raise Exception('Not unicode chars: ' + hg1LastChar + ' ' + hg2FirstChar ) if isHangulSyllable(hg1LastChar) and isHangulSyllable(hg2FirstChar): return hg1LastChar + hg2FirstChar if not isHangulSyllable(hg1LastChar) and not isHangulSyllable(hg2FirstChar): raise Exception('Cannot join two non-syllables: ' + hg1LastChar + ' ' + hg2FirstChar) if isHangulSyllable(hg1LastChar): return joinHangulSyllableAndJamo(hg1LastChar,hg2FirstChar) else: return joinHangulJamoAndSyllable(hg1LastChar,hg2FirstChar)
def test_isUnicodeChar(self): for obj, expected in ((u'a', True), (u'ab', False), ('a', False), (u'', False), (None, False), (u'ㄱ', True), (u'가', True), (u'가자', False), (u'㐀', True), (u'𠀀', True)): result = cu.isUnicodeChar(obj) self.assertEquals(result, expected)
def isHangulChar(obj): if not isUnicodeChar(obj): return False if isAsciiChar(obj): # Most likely non-hangul char type return False if isHangulSyllable(obj) or isHangulCompatibilityJamo(obj) or isHangulJamo(obj): return True return False
def test_conversions(self): for char,expected in ((u'a','U+0061'),('a','U+0061'),(u'ㄱ','U+3131'),(u'가','U+AC00'),(u'㐀','U+3400'),(u'𠀀','U+20000')): cp = cu.characterToCodepoint(char) # ord(char) self.assertTrue(cu.codePointExists(cp)) char2 = cu.codePointToCharacter(cp) self.assertTrue(cu.isUnicodeChar(char2)) self.assertEquals(char,char2) cpStr = cu.codePointToCodePointString(cp) self.assertEquals(cpStr,expected) cp2 = cu.codePointStringToCodePoint(cpStr) self.assertEquals(cp,cp2) char3 = cu.codePointStringToCharacter(cpStr) self.assertEquals(char3,char)
def hangulJamoToTrans(jamo): if jamo == None: # Assume empty T return '' if not isUnicodeChar(jamo): raise Exception('Not a unicode char: ' + unicode(jamo)) cp = ord(jamo) if isL(cp): return jamoTransList_L[cp - LBase] elif isV(cp): return jamoTransList_V[cp - VBase] elif isT(cp): return jamoTransList_T[cp - TBase] else: raise Exception('Not a Hangul Jamo: ' + codePointToCodePointString(cp) + ' ' +unicode(jamo))
def test_conversions(self): for char, expected in ((u'a', 'U+0061'), ('a', 'U+0061'), (u'ㄱ', 'U+3131'), (u'가', 'U+AC00'), (u'㐀', 'U+3400'), (u'𠀀', 'U+20000')): cp = cu.characterToCodepoint(char) # ord(char) self.assertTrue(cu.codePointExists(cp)) char2 = cu.codePointToCharacter(cp) self.assertTrue(cu.isUnicodeChar(char2)) self.assertEquals(char, char2) cpStr = cu.codePointToCodePointString(cp) self.assertEquals(cpStr, expected) cp2 = cu.codePointStringToCodePoint(cpStr) self.assertEquals(cp, cp2) char3 = cu.codePointStringToCharacter(cpStr) self.assertEquals(char3, char)
def joinJamoSeparatedHangul(hangul1,jamo, hangul2): if hangul1 and not isUnicodeString(hangul1): raise Exception('Not a unicode string: ' + hangul1) if hangul2 and not isUnicodeString(hangul2): raise Exception('Not a unicode string: ' + hangul2) if not isUnicodeChar(jamo): raise Exception('Not a unicode char: ' + hangul2) if not hangul1 and not hangul2: raise Exception('Adjacent Hangul strings both empty') # First try to join jamo to preceding syll if hangul1: hg1LastChar = hangul1[-1] try: joinedSyll = joinHangulSyllableAndJamo(hg1LastChar,jamo) joinedHangul = hangul1[:-1] + joinedSyll if hangul2: joinedHangul += hangul2 return joinedHangul except: pass # fall through # If can't to preceding hangul, try to join to following jangul if hangul2: hg2FirstChar = hangul2[0] try: joinedSyll = joinHangulJamoAndSyllable(jamo,hg2FirstChar) joinedHangul = joinedSyll + hangul2[1:] if hangul1: joinedHangul = hangul1 + joinedHangul return joinedHangul except: pass # fall through raise Exception('Can\'t join hangul sequence: %s %s %s' % (str(hangul1),str(jamo),str(hangul2)))
def test_isUnicodeChar(self): for obj,expected in ((u'a',True),(u'ab',False),('a',False),(u'',False),(None,False),(u'ㄱ',True),(u'가',True),(u'가자',False),(u'㐀',True),(u'𠀀',True)): result = cu.isUnicodeChar(obj) self.assertEquals(result,expected)
def isHangulCompatibilityJamo(obj): if not isUnicodeChar(obj): return False cp = ord(obj) return isC(cp)
def isHangulJamo_T(obj): if not isUnicodeChar(obj): return False cp = ord(obj) return isT(cp)
def isHangulSyllable(obj): if not isUnicodeChar(obj): return False cp = ord(obj) return isCombinedS(cp)