# import sys sys.path.append('../..') from tamil.txt2unicode import auto2unicode # note that atleast one compound characters in below tscii variable string # falls under tscii.unique.chars.txt characters. # for other encodes user need to look at encodes_chars directory files and # identify atleast one unique compound characters & incert in input text. # so that auto2unicode can identiy encode for you ! tscii = """¾¢ÕÅûÙÅ÷ «ÕǢ ¾¢ÕìÌÈû """ uni = auto2unicode(tscii) f = open('unicode-result.txt', 'w') f.write(uni) f.close() print("tscii", tscii) print("unicode", uni) print("converted unicode stored in 'unicode-result.txt' file\n\n") # demo for common compound characters common = """ù£tPùP\[tI è£n\[nwh ùSô ªþ£ ùaô """ print("common", common) uni = auto2unicode(common)
# -*- coding: utf-8 -*- # (C) 2014 Arulalan.T <*****@*****.**> # # This file is part of 'open-tamil/txt2unicode' package examples # import sys sys.path.append('../..') from tamil.txt2unicode import tscii2unicode, unicode2tscii, unicode2auto, auto2unicode uni_1 = """திருவள்ளுவர் அருளிய திருக்குறள் """ tscii = unicode2tscii(uni_1) tscii_sample = tscii.split(' ')[0] tscii_from_auto = unicode2auto(uni_1, tscii_sample) uni_2 = auto2unicode(tscii_from_auto) f = open('auto_encode-result.txt', 'w') f.write("Initial unicode : " + uni_1 + "\n\n") f.write("From unicode to tscii : " + tscii + "\n\n") f.write("From unicode to tscii by auto function : " + tscii_from_auto + "\n\n") f.write("Again back to unicode from above tscii by auto function: " + uni_2) f.close() assert (uni_1 == uni_2), " Both unicode are 'not' same! " assert (tscii == tscii_from_auto), " Both tscii are 'not' same! " print "unicode original input", uni_1 print "from unicode2tscii", tscii print "from unicode2auto", tscii_from_auto print "back to unicode", uni_2
# This file is part of 'open-tamil/txt2unicode' package examples # import sys sys.path.append('../..') from tamil.txt2unicode import tscii2unicode, unicode2tscii, unicode2auto, auto2unicode uni_1 = u"""திருவள்ளுவர் அருளிய திருக்குறள் """ tscii = unicode2tscii(uni_1) if not tscii: # FIXME: known faliure. assert False, "unicode2tscii failed. You need to debug" print(tscii,len(tscii)) tscii_sample = tscii.split(' ')[0] tscii_from_auto = unicode2auto(uni_1, tscii_sample) uni_2 = auto2unicode(tscii_from_auto) f = open(u'auto_encode-result.txt', 'w') f.write("Initial unicode : " + uni_1 + "\n\n") f.write("From unicode to tscii : " + tscii + "\n\n") f.write("From unicode to tscii by auto function : " + tscii_from_auto + "\n\n") f.write("Again back to unicode from above tscii by auto function: " + uni_2) f.close() assert (uni_1 == uni_2), " Both unicode are 'not' same! " assert (tscii == tscii_from_auto), " Both tscii are 'not' same! " print("unicode original input", uni_1) print("from unicode2tscii", tscii) print("from unicode2auto", tscii_from_auto) print("back to unicode", uni_2)
# import sys sys.path.append('../..') from tamil.txt2unicode import auto2unicode # note that atleast one compound characters in below tscii variable string # falls under tscii.unique.chars.txt characters. # for other encodes user need to look at encodes_chars directory files and # identify atleast one unique compound characters & incert in input text. # so that auto2unicode can identiy encode for you ! tscii = """¾¢ÕÅûÙÅ÷ «ÕǢ ¾¢ÕìÌÈû """ uni = auto2unicode(tscii) f = open('unicode-result.txt', 'w') f.write(uni) f.close() print "tscii", tscii print "unicode", uni print "converted unicode stored in 'unicode-result.txt' file\n\n" # demo for common compound characters common = """ù£tPùP\[tI è£n\[nwh ùSô ªþ£ ùaô """ print "common", common uni = auto2unicode(common)