def get(self): # Get parameters logging.info('DoTranslitHandler') rules = self.request.get('rules', '').decode('unicode-escape') input = self.request.get('input', 'No input') input = urllib.unquote(input.encode('utf-8')) logging.info('DoTranslitHandler rules = %s' % rules) error = '' # Set if there's a problem. debug = True OkellJKW_Translit = None # Create transliterator(s) if needed OkellJKW_Translit = transliterate.Transliterate( translit_burmese_rules.TRANSLIT_MY_OKELL_JW, debug=True) try: if not OkellJKW_Translit: logging.info('NEW NEW OKELL') logging.info('*** %s lines' % len(translit_burmese_rules.TRANSLIT_MY_OKELL_JW.split('\n'))) OkellJKW_Translit = transliterate.Transliterate( translit_burmese_rules.TRANSLIT_MY_OKELL_JW, debug=True) except: e = sys.exc_info()[0] error = '!!!!! Creating transliterator Error e = %s.' % (e) logging.error(error) out_text = '~~~~~~~~~ Creation Error: %s' % e # !!! FINISH THIS trans = OkellJKW_Translit out_text = "not transliterated" try: out_text = trans.transliterate(input) except: e = sys.exc_info()[0] logging.error('!! Calling transliterate Error e = %s. trans=%s' % (e, trans)) logging.info('outText = %s' % (out_text)) message = 'MESSAGE #''' # TODO: Fill in with error or success message. summary_text = "No summary available" if trans: try: summary = trans.getSummary() summary_text = ','.join(summary['shortcuts'].values()) except AttributeError: summary_text = "No summary available" result = { 'outText': out_text, #'outText' : outText, 'message' : message, 'error': error, 'summary' : summary_text, } return_string = json.dumps(result) self.response.out.write(return_string)
def get(self): # TODO: Get the text values # Call transliterator # Return JSON structure with values. langInfo = self.app.config.get('langInfo') if langInfo.transliterator: transliterator = transliterate.Transliterate( langInfo.transliterator.TRANS_LIT_RULES, langInfo.transliterator.DESCRIPTION ) outText = '\u11103\u11101\u11103' message = 'TBD' error = '' result = { 'outText' : outText, 'message' : message, 'error': error, 'language': Language, 'langTag': LanguageCode, 'showTools': self.request.get('tools', None), 'summary' : transliterator.getSummary(), } self.response.out.write(json.dumps(result))
def get(self): # Get parameters rules = self.request.get('rules', '').decode('unicode-escape') input = self.request.get('input', '') logging.info('INPUT = %s' % input) input = urllib.unquote(input) logging.info('INPUT = %s' % input) input = urllib.unquote(input) logging.info('INPUT = %s' % input) input = input.decode('unicode-escape') logging.info('INPUT = %s' % input) logging.info('RULES = %s' % rules) logging.info('INPUT = %s' % input) trans = transliterate.Transliterate(rules, True) outText = trans.transliterate(input) logging.info('rules = %s' % rules) logging.info('input = %s' % input) logging.info('trans = %s' % trans) logging.info('outText = %s' % outText) message = '' # TODO: Fill in with error or success message. error = '' result = { 'outText': outText, 'message': message, 'error': error, 'summary': trans.getSummary(), } self.response.out.write(json.dumps(result))
def get(self): logging.info('ConvertHandler get received. %s' % self.request) global my_wwburn_converter_Unicode text = unicode(self.request.get('text')) #logging.info('text = %s' % text) input_type = self.request.get('type', 'Z') strip_spaces = self.request.get('strip_spaces', None) debug = self.request.get('debug', None) input = urllib.unquote(text) # .decode('utf-8') #logging.info('decoded text = %s' % text) noreturn = self.request.get('noreturn', None) msg = '' # THE ACTUAL CONVERSION. if True: ## TODO: Fix later. not my_wwburn_converter_Unicode: my_wwburn_converter_Unicode = transliterate.Transliterate( transrule_my_wwburn.MY_WWBURN_UNICODE_TRANSLITERATE, transrule_my_wwburn.UNICODE_DESCRIPTION) subst = transrule_my_wwburn.Substitutions text = input for rep in subst: text = input.replace(rep[0], rep[1]) input = text result = my_wwburn_converter_Unicode.transliterate(input, debug) self.response.headers['Content-Type'] = 'application/json' if input: if noreturn: returntext = '' else: returntext = text #logging.info('RESULT has %d characters' % len(result)) # Call the converter on this text data. obj = { 'input': returntext, 'input_type': input_type, 'msg': msg, 'converted': result, 'detector_description': transrule_my_wwburn.UNICODE_DESCRIPTION, 'noreturn': noreturn, 'inputSize': len(input), 'resultSize': len(result), 'errmsg': None } else: obj = { 'input': text, 'input_type': input_type, 'msg': msg, 'noreturn': noreturn, 'errmsg': 'Null input' } self.response.out.write(json.dumps(obj))
def testContext(): print('*** Context Rules') ContextRules = \ """{γ } γ > n; {γ } κ > n; {γ } ξ > n; {γ } χ > n; γ > g; γ > g; κ > k; ξ > x; χ > ch; """ test_input = ['γγ', 'γκ', 'γξ', 'γχ', 'γ'] expected = ['ng', 'nk', 'nx', 'nch', 'g'] translit = transliterate.Transliterate(ContextRules, 'Testing new ') item_index = 0 for inline, expected in zip(test_input, expected): outline = translit.transliterate(inline) print('Test %d:\n >>%s<<\n %4d >>%s<<\n %4d >>%s<<' % (item_index, inline, len(expected), expected, len(outline), outline)) if outline != expected: print('Difference in expected data 1') diffs = difflib.context_diff(expected, outline, fromfile='before.py', tofile='after.py') sys.stdout.writelines(diffs) item_index += 1
def testBasic(): test_input = ['cxyzd'] expected = ['cyzqd'] raw_rules = """\ (x)(yz) > $2 | $1; x > q; a { b } c > B; { b } d > BD; b } d > BD-; a { b > AB; a { b } > AB-; """ translit = transliterate.Transliterate(raw_rules, 'Testing new ') item_index = 0 for inline, expected in zip(test_input, expected): outline = translit.transliterate(inline) print('Test %d:\n >>%s<<\n %4d >>%s<<\n %4d >>%s<<' % (item_index, inline, len(expected), expected, len(outline), outline)) if outline != expected: print('Difference in expected data 1') diffs = difflib.context_diff(expected, outline, fromfile='before.py', tofile='after.py') sys.stdout.writelines(diffs) item_index += 1
class myConvert(): description = 'Converts Zawgyi font encoding to Unicode' converter = transliterate.Transliterate( translit_zawgyi.ZAWGYI_UNICODE_TRANSLITERATE, translit_zawgyi.ZAWGYI_description) oldFonts = ['Zawgyi', 'ZawgyiOne', 'Zawgyi2008'] # Maybe some others? def __init__(self, newFont=None): if newFont: self.unicodeFont = newFont else: self.unicodeFont = 'NotoSansMyanmar' return def convertText(self, textIn): self.convertText(textIn, False, None) def toLower(self, inText): return inText # Consider the font information if relevant, e.g., underlining. # fontInfo: a list of font data for this code, including formatting for each piece. def convertText(self, textIn, fontTextInfo): if not isinstance(textIn, basestring): return textIn if not fontTextInfo: return self.convertString(textIn, None, convertToLower=None) # Take the data from the fontTextInfo field. # TODO: Apply style as needed. convertList = [] for item in fontTextInfo: if debug: print('++ text = %s' % item[0]) tags = [] for fmt in item[1]: loc = fmt.tag.find('}') tags.append(fmt.tag[loc + 1:]) if debug: print(' %s ' % fmt.tag[loc + 1:]) # Convert this one, and return the result convertList.append(self.convertString(item[0], tags)) print('***** CONVERT LIST = %s' % u''.join(convertList).encode('utf-8')) return u''.join(convertList) def convertString(self, textIn, fontInfo, convertToLower=False): transliterator = self.converter convertResult = transliterator.transliterate(textIn) return convertResult
def createTest(): sampleRule = u""" $consonant = [A-Z]; A > E; #($consonant) > X; (x)(yz) > $2 | $1; N > Y|es; es > y\=; """ trans = transliterate.Transliterate(sampleRule, True) intext = \ """A CY cy H 3AH. က\n Now is the time for All conscious people coming to califHorniA. xyz """ out_text = trans.transliterate(intext) print('createText: output = %s' % (out_text))
def get(self): # TODO: Get the text values # Call transliterator # Return JSON structure with values. transCcp = transliterate.Transliterate(transrule_ccp.TRANS_LIT_RULES, transrule_ccp.DESCRIPTION) outText = '\u11103\u11101\u11103' message = 'TBD' error = '' result = { 'outText': outText, 'message': message, 'error': error, 'language': 'Chakma', 'langTag': 'ccp', 'showTools': self.request.get('tools', None), 'summary': transCcp.getSummary(), } self.response.out.write(json.dumps(result))
def main(argv=None): # The transliterator object trans = None # TODO: Test XML input and parsing if len(argv) > 1: path = os.path.splitext(argv[1]) base_file_name = os.path.basename(argv[1]) if path[1] == '.xml': cwd = os.getcwd() trans = xml_transliterator = testXmlInput(argv[1]) # Get the transliterator data if not trans: trans = transliterate.Transliterate( translit_burmese_rules.TRANSLIT_MY_OKELL_JW, 'description', debug=True) if not trans: print('Cannot create transliterator') return #trans.printSummary() test_data = TestData(trans) output = test_data.test() for out in output: print('%s → %s' % (out[0][0].encode('utf-8'), out[-1].encode('utf-8'))) # print('%s' % (out[-1])) # Just the transliterated result # Start testing the transliterator with Burmese text. return
def main(args): trans = transliterate.Transliterate( transrule_my_wwburn.MY_WWBURN_UNICODE_TRANSLITERATE) subst = transrule_my_wwburn.Substitutions test1(trans, subst)
def main(argv=None): # TODO: Test XML input and parsing if len(argv) > 1: path = os.path.splitext(argv[1]) base_file_name = os.path.basename(argv[1]) if path[1] == '.xml': xml_transliterator = testXmlInput(argv[1]) if base_file_name == 'Latin_Adlam.xml': testAdlam(xml_transliterator) return if base_file_name == 'Myanmar-Latin.xml': testMyanmarLatin(xml_transliterator) return else: # Basic test #testBasic() createTest() # testContext() return # Old testss if len(argv) > 1: print(argv) inType = argv[1] inFile = argv[2] print(inType, inFile) if inType == 'knu': trans = Transliterate(translit_knu.TRANS_LIT_RULES) encoding = 'knu' elif inType == 'zawgyi': trans = Transliterate(translit_zawgyi.TRANS_LIT_RULES) encoding = 'zawgyi' elif inType == 'uni_mon': trans = Transliterate(translit_zawgyi.UNIMON_UNICODE_TRANSLITERATE) encoding = 'uni_mon' elif inType == 'shanthai': trans = None # Transliterate(translit_zawgyi.SHANTHAI_TRANSLITERATE) encoding = 'shanthai' transliterateFile(trans, encoding, inFile) return trans = transliterate.Transliterate( translit_zawgyi.ZAWGYI_UNICODE_TRANSLITERATE) # New is not working yet. trans = Transliterate(ZAWGYI_UNICODE_TRANSLITERATE_2) trans.summary() testPhase1a(trans) testPhase1(trans) testPhase2(trans) biggerTest(trans) biggerTest2(trans) #return test1 = u'ေျခႀက' # 1031 103b 1001 1080 1000 result1 = trans.transliterate(test1) print('Output is %s' % result1) print('-------------\n') test2 = uStringsToText(u'\u1000\u1064') result2 = trans.transliterate(test2) print('Output 2 is %s' % result2) print('-------------\n') # biggerTest() return