def test_translate(self): pua = u'나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e' # noqa from hypua2jamo import translate expected = u'나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮' result = translate(pua) self.assertEqual(expected, result)
def test_translate_composed(self): from hypua2jamo import translate jamo_string = translate( Fixtures.HunMinPreface.pua_string, ) self.assertEqual( Fixtures.HunMinPreface.composed_jamo_string, jamo_string, )
def test_conversion(self): pua = u'나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e' from hypua2jamo import translate expected = u'나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮' result = translate(pua) for p, e, r in zip(pua.split(' '), expected.split(' '), result.split(' ')): print('P %s %r' % (p.encode('utf-8'), p)) print('E %s %r' % (e.encode('utf-8'), e)) print('R %s %r' % (r.encode('utf-8'), r)) self.assertEquals(expected, result)
def test_conversion(self): pua = u"나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e" from hypua2jamo import translate expected = u"나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮" result = translate(pua) for p, e, r in zip(pua.split(" "), expected.split(" "), result.split(" ")): print "P", p, repr(p) print "E", e, repr(e) print "R", r, repr(r) self.assertEquals(expected, result)
def decode_string(s): # 한양 PUA s = hypua2jamo.translate(s) # 우리말샘 사이트 웹폰트 while True: m = re.search( '(?:<span class="korean-webfont">|<equ>)(&#x[0-9A-F]{1,6};|.)(?:</span>|</equ>)?', s) if not m: break ch = m.group(1) pua_map = { '\uE01D': '\u0254\u0342', # ɔ͂ '\uE01E': '\u025B\u0342', # ɛ͂ '\uE01F': 'n\u0304', # n̄ '\uE020': '𝆑𝆑𝆑', '\uE021': '𝆑𝆑', '\uE022': '𝆑𝆏', '\uE023': '𝆑𝆎', '\uE024': '▞', '\uE025': '▚', '\uE026': '\u3001', # IDEOGRAPHIC COMMA '\uE02C': 'ᅟᅵᇰ', '\uE02E': '타ᇦ' } if ch.startswith('&#x') and ch.endswith(';'): ch = chr(int(ch[3:-1], 16)) if ord(ch) in range(0xE000, 0xF8FF + 1) or ord(ch) in range( 0xF0000, 0xFFFFD + 1) or ord(ch) in range( 0x100000, 0x10FFFD + 1): if ch in pua_map: ch = pua_map[ch] else: ch = '<webfont>U+%X</webfont>' % ord(ch) a, b = m.span(0) s = s[:a] + ch + s[b:] # TODO: 기타 PUA return s
def test_jc2p_decode(self): def translate(jamo_string): decoder = self.make_decoder() return decoder.decode(jamo_string, final=True) pua = self.OUTPUT_STRING jamo = self.INPUT_STRING self.assertEqual(pua[:1], translate(jamo[:1])) # 나 self.assertEqual(pua[:2], translate(jamo[:2])) # 랏 self.assertEqual(pua[:3], translate(jamo[:3])) self.assertEqual(pua[:4], translate(jamo[:4])) self.assertEqual(pua[:5], translate(jamo[:5])) self.assertEqual(pua[:5] + u'\uf7ca', translate(jamo[:6])) self.assertEqual(pua[:6], translate(jamo[:7])) self.assertEqual(pua[:7], translate(jamo[:8])) self.assertEqual(pua[:8], translate(jamo[:9])) self.assertEqual(pua[:9], translate(jamo[:10])) self.assertEqual(pua[:10], translate(jamo[:11])) # 中 self.assertEqual(pua[:10] + u'\uf790', translate(jamo[:12])) self.assertEqual(pua[:10] + u'\u1103\u1172', translate(jamo[:13])) self.assertEqual(pua[:11], translate(jamo[:14])) self.assertEqual(pua[:12], translate(jamo[:15])) # 國 self.assertEqual(pua[:13], translate(jamo[:16])) # 귁 self.assertEqual(pua[:14], translate(jamo[:17])) # 에 self.assertEqual(pua[:15], translate(jamo[:18])) # u302e self.assertEqual(pua[:16], translate(jamo[:19])) # u0020 self.assertEqual(pua[:17], translate(jamo[:20])) # 달 self.assertEqual(pua[:18], translate(jamo[:21])) # 아 self.assertEqual(pua[:19], translate(jamo[:22])) # u302e self.assertEqual(pua[:20], translate(jamo[:23])) # u0020 self.assertEqual(pua[:21], translate(jamo[:24])) # 문 self.assertEqual(pua[:21] + u'\uf7ea', translate(jamo[:25])) # self.assertEqual(pua[:21] + u'\uf250', translate(jamo[:26])) # self.assertEqual(pua[:22], translate(jamo[:27])) # self.assertEqual(pua[:23], translate(jamo[:28])) # 와 self.assertEqual(pua[:24], translate(jamo[:29])) # u302e self.assertEqual(pua[:25], translate(jamo[:30])) # 로 self.assertEqual(pua[:26], translate(jamo[:31])) # u0020 self.assertEqual(pua[:27], translate(jamo[:32])) # 서 self.assertEqual(pua[:28], translate(jamo[:33])) # 르 self.assertEqual(pua[:29], translate(jamo[:34])) # u0020 self.assertEqual(pua[:29] + u'\uf7c2', translate(jamo[:35])) self.assertEqual(pua[:30], translate(jamo[:36])) self.assertEqual(pua[:30] + u'\uf7a8', translate(jamo[:37])) self.assertEqual(pua[:30] + u'\ue560', translate(jamo[:38])) self.assertEqual(pua[:31], translate(jamo[:39])) self.assertEqual(pua[:32], translate(jamo[:40])) # 디 self.assertEqual(pua[:33], translate(jamo[:41])) # u302e self.assertEqual(pua[:34], translate(jamo[:42])) # u0020 self.assertEqual(pua[:35], translate(jamo[:43])) # 아 self.assertEqual(pua[:36], translate(jamo[:44])) # 니 self.assertEqual(pua[:37], translate(jamo[:45])) # u302e self.assertEqual(pua[:37] + u'\uf7fc', translate(jamo[:46])) self.assertEqual(pua[:37] + u'\uf537', translate(jamo[:47])) self.assertEqual(pua[:38], translate(jamo[:48])) self.assertEqual(pua[:38] + u'\uf7ca', translate(jamo[:49])) self.assertEqual(pua[:39], translate(jamo[:50])) self.assertEqual(pua[:40], translate(jamo[:51])) # 302e self.assertEqual(pua[:40], translate(jamo))
def test_jc2p_decode(self): from cffi import FFI from hypua2jamo._cffi import lib unicode_size = array('u').itemsize if unicode_size == 4: _translate = lib.hypua_jc2p_ucs4_decode _calcsize = lib.hypua_jc2p_ucs4_calcsize elif unicode_size == 2: _translate = lib.hypua_jc2p_ucs2_decode _calcsize = lib.hypua_jc2p_ucs2_calcsize else: raise AssertionError(unicode_size) ffi = FFI() def translate(jamo_string): jamo_array = array('u', jamo_string) jamo_ptr, jamo_len = jamo_array.buffer_info() jamo_ptr = ffi.cast('void *', jamo_ptr) pua_size = _calcsize(jamo_ptr, jamo_len) pua_array = array('u', u' ' * pua_size) pua_ptr = pua_array.buffer_info()[0] pua_ptr = ffi.cast('void *', pua_ptr) pua_len = _translate(jamo_ptr, jamo_len, pua_ptr) if pua_size != pua_len: raise Exception('%r != %r', pua_size, pua_len) return pua_array.tounicode() pua = self.pua_string jamo = self.jamo_string self.assertEqual(pua[:1], translate(jamo[:1])) # 나 self.assertEqual(pua[:2], translate(jamo[:2])) # 랏 self.assertEqual(pua[:3], translate(jamo[:3])) self.assertEqual(pua[:4], translate(jamo[:4])) self.assertEqual(pua[:5], translate(jamo[:5])) self.assertEqual(pua[:5] + u'\uf7ca', translate(jamo[:6])) self.assertEqual(pua[:6], translate(jamo[:7])) self.assertEqual(pua[:7], translate(jamo[:8])) self.assertEqual(pua[:8], translate(jamo[:9])) self.assertEqual(pua[:9], translate(jamo[:10])) self.assertEqual(pua[:10], translate(jamo[:11])) # 中 self.assertEqual(pua[:10] + u'\uf790', translate(jamo[:12])) self.assertEqual(pua[:10] + u'\u1103\u1172', translate(jamo[:13])) self.assertEqual(pua[:11], translate(jamo[:14])) self.assertEqual(pua[:12], translate(jamo[:15])) # 國 self.assertEqual(pua[:13], translate(jamo[:16])) # 귁 self.assertEqual(pua[:14], translate(jamo[:17])) # 에 self.assertEqual(pua[:15], translate(jamo[:18])) # u302e self.assertEqual(pua[:16], translate(jamo[:19])) # u0020 self.assertEqual(pua[:17], translate(jamo[:20])) # 달 self.assertEqual(pua[:18], translate(jamo[:21])) # 아 self.assertEqual(pua[:19], translate(jamo[:22])) # u302e self.assertEqual(pua[:20], translate(jamo[:23])) # u0020 self.assertEqual(pua[:21], translate(jamo[:24])) # 문 self.assertEqual(pua[:21] + u'\uf7ea', translate(jamo[:25])) # self.assertEqual(pua[:21] + u'\uf250', translate(jamo[:26])) # self.assertEqual(pua[:22], translate(jamo[:27])) # self.assertEqual(pua[:23], translate(jamo[:28])) # 와 self.assertEqual(pua[:24], translate(jamo[:29])) # u302e self.assertEqual(pua[:25], translate(jamo[:30])) # 로 self.assertEqual(pua[:26], translate(jamo[:31])) # u0020 self.assertEqual(pua[:27], translate(jamo[:32])) # 서 self.assertEqual(pua[:28], translate(jamo[:33])) # 르 self.assertEqual(pua[:29], translate(jamo[:34])) # u0020 self.assertEqual(pua[:29] + u'\uf7c2', translate(jamo[:35])) self.assertEqual(pua[:30], translate(jamo[:36])) self.assertEqual(pua[:30] + u'\uf7a8', translate(jamo[:37])) self.assertEqual(pua[:30] + u'\ue560', translate(jamo[:38])) self.assertEqual(pua[:31], translate(jamo[:39])) self.assertEqual(pua[:32], translate(jamo[:40])) # 디 self.assertEqual(pua[:33], translate(jamo[:41])) # u302e self.assertEqual(pua[:34], translate(jamo[:42])) # u0020 self.assertEqual(pua[:35], translate(jamo[:43])) # 아 self.assertEqual(pua[:36], translate(jamo[:44])) # 니 self.assertEqual(pua[:37], translate(jamo[:45])) # u302e self.assertEqual(pua[:37] + u'\uf7fc', translate(jamo[:46])) self.assertEqual(pua[:37] + u'\uf537', translate(jamo[:47])) self.assertEqual(pua[:38], translate(jamo[:48])) self.assertEqual(pua[:38] + u'\uf7ca', translate(jamo[:49])) self.assertEqual(pua[:39], translate(jamo[:50])) self.assertEqual(pua[:40], translate(jamo[:51])) # 302e self.assertEqual(pua[:40], translate(jamo))