コード例 #1
0
# 

import sys
sys.path.append('../..')
from tamil.txt2unicode import auto2unicode

# note that atleast one compound characters in below tscii variable string
# falls under tscii.unique.chars.txt characters.

# for other encodes user need to look at encodes_chars directory files and
# identify atleast one unique compound characters & incert in input text. 
# so that auto2unicode can identiy encode for you ! 

tscii = """¾¢ÕÅûÙÅ÷ 
«ÕǢ ¾¢ÕìÌÈû  """

uni = auto2unicode(tscii)
f = open('unicode-result.txt', 'w')
f.write(uni)
f.close()

print("tscii", tscii)
print("unicode", uni)
print("converted unicode stored in 'unicode-result.txt' file\n\n")

# demo for common compound characters
common = """ù£tPùP\[tI
è£n\[nwh ùSô ªþ£ ùaô """
print("common", common)
uni = auto2unicode(common)
コード例 #2
0
# -*- coding: utf-8 -*-

# (C) 2014 Arulalan.T <*****@*****.**>
#
# This file is part of 'open-tamil/txt2unicode' package examples
#

import sys
sys.path.append('../..')
from tamil.txt2unicode import tscii2unicode, unicode2tscii, unicode2auto, auto2unicode

uni_1 = """திருவள்ளுவர் அருளிய திருக்குறள்    """
tscii = unicode2tscii(uni_1)
tscii_sample = tscii.split(' ')[0]
tscii_from_auto = unicode2auto(uni_1, tscii_sample)
uni_2 = auto2unicode(tscii_from_auto)

f = open('auto_encode-result.txt', 'w')
f.write("Initial unicode : " + uni_1 + "\n\n")
f.write("From unicode to tscii : " + tscii + "\n\n")
f.write("From unicode to tscii by auto function : " + tscii_from_auto + "\n\n")
f.write("Again back to unicode from above tscii by auto function: " + uni_2)
f.close()

assert (uni_1 == uni_2), " Both unicode are 'not' same! "
assert (tscii == tscii_from_auto), " Both tscii are 'not' same! "

print "unicode original input", uni_1
print "from unicode2tscii", tscii
print "from unicode2auto", tscii_from_auto
print "back to unicode", uni_2
コード例 #3
0
# This file is part of 'open-tamil/txt2unicode' package examples
# 

import sys
sys.path.append('../..')
from tamil.txt2unicode import tscii2unicode, unicode2tscii, unicode2auto, auto2unicode

uni_1 = u"""திருவள்ளுவர் அருளிய திருக்குறள்    """
tscii = unicode2tscii(uni_1)
if not tscii:
    # FIXME: known faliure.
    assert False, "unicode2tscii failed. You need to debug"
print(tscii,len(tscii))
tscii_sample = tscii.split(' ')[0]
tscii_from_auto = unicode2auto(uni_1, tscii_sample)
uni_2 = auto2unicode(tscii_from_auto)

f = open(u'auto_encode-result.txt', 'w')
f.write("Initial unicode : " + uni_1 + "\n\n")
f.write("From unicode to tscii : " + tscii + "\n\n")
f.write("From unicode to tscii by auto function : " + tscii_from_auto + "\n\n")
f.write("Again back to unicode from above tscii by auto function: " +  uni_2)
f.close()

assert (uni_1 == uni_2), " Both unicode are 'not' same! "
assert (tscii == tscii_from_auto), " Both tscii are 'not' same! "

print("unicode original input", uni_1)
print("from unicode2tscii", tscii)
print("from unicode2auto", tscii_from_auto)
print("back to unicode", uni_2)
コード例 #4
0
#

import sys
sys.path.append('../..')
from tamil.txt2unicode import auto2unicode

# note that atleast one compound characters in below tscii variable string
# falls under tscii.unique.chars.txt characters.

# for other encodes user need to look at encodes_chars directory files and
# identify atleast one unique compound characters & incert in input text.
# so that auto2unicode can identiy encode for you !

tscii = """¾¢ÕÅûÙÅ÷ 
«ÕǢ ¾¢ÕìÌÈû  """

uni = auto2unicode(tscii)
f = open('unicode-result.txt', 'w')
f.write(uni)
f.close()

print "tscii", tscii
print "unicode", uni
print "converted unicode stored in 'unicode-result.txt' file\n\n"

# demo for common compound characters
common = """ù£tPùP\[tI
è£n\[nwh ùSô ªþ£ ùaô """
print "common", common
uni = auto2unicode(common)