def semiticListTransliterate(charlist, script2, fix=''): results = {} results['Latn'] = charlist.replace('،', ',').split(',') results['script2'] = transliterate.process('Latn', script2, charlist, nativize=False).replace( '،', ',').split(',') results['script2R'] = transliterate.process(script2, 'Latn', transliterate.process('Latn', script2, charlist, nativize=False))\ .replace('،',',').split(',') results['script2G'] = transliterate.process('Brahmi', script2, transliterate.process('Latn', 'Brah', charlist))\ .replace('،',',').split(',') results['script2GR'] = transliterate.process('Brah', 'Latn', transliterate.process('Latn', 'Brah', charlist))\ .replace('،',',').split(',') for script in GM.SemiticScripts: if script != 'Latn' and script != 'Arab-Ph' and script != 'Arab-Ga': #print(script) results[script] = transliterate.process('Latn', script, charlist, nativize=False)\ .replace('،',',').split(',') results[script + 'R'] = transliterate.process(script, 'Latn', \ transliterate.process('Latn', script, charlist, nativize=False), nativize=False)\ .replace('،',',').split(',') if fix == 'alephA': results['script2R'][0] = 'ʾ' elif fix == 'inherentA': results['script2R'][0] += 'a' return results
def convertNumerals(bnLine, enLine): newBnLine, newEnLine = bnLine, enLine for enNumeral in re.findall(r'[0-9]+', newBnLine, flags=re.UNICODE): newBnLine = newBnLine.replace( enNumeral, transliterate.process('RomanReadable', 'Bengali', enNumeral), 1) for bnNumeral in re.findall(r'[০-৯]+', newEnLine, flags=re.UNICODE): newEnLine = newEnLine.replace( bnNumeral, transliterate.process('Bengali', 'RomanReadable', bnNumeral), 1) return newBnLine, newEnLine
def get(self, uid, target): db = get_db() results = db.aql.execute(SUTTA_SINGLE_PALI_TEXT, bind_vars={'uid': uid}) result = next(results) if not result: return {'error': 'Not Found'}, 404 sutta_texts = {k: json_load(v) for k, v in result.items()} for key, value in sutta_texts[uid].items(): sutta_texts[uid][key] = transliterate.process('ISO', target, value) return sutta_texts[uid]
def romanize_file(fname): with codecs.open(fname + ".txt", encoding='utf-8') as f: parsed = f.readlines() parsed = [x.strip() for x in parsed] trans_list = [] print(parsed) for word in parsed: text = word translated_txt = transliterate.process('Telugu', 'ISO', word) print(word) print(translated_txt) trans_list.append(translated_txt) with io.open(fname + "_transliterated.txt", "w", encoding="utf-8") as f: for padam in trans_list: f.write(padam + '\n') return trans_list
def get(self, target, text): return transliterate.process('IAST', target, text)
def main(argv): global GOOGLE_SHEETS_FILE_ID global prevSection global section global title global bookName try: opts, args = getopt.getopt(argv, "i:b:", ["InputFile=", "BookName="]) except getopt.GetoptError: print('pg.py [-i] <GoogleSheetId> [-b <BookName>]') sys.exit(2) for opt, arg in opts: if opt in ("-i", "--ifile"): GOOGLE_SHEETS_FILE_ID = arg elif opt in ("-b", "--BookName"): bookName = arg if GOOGLE_SHEETS_FILE_ID == '' or bookName == '': print('pg.py [-i] <GoogleSheetId> [-b <BookName>]') sys.exit() google_client = pygsheets.authorize() spreadsheet = google_client.open_by_key(GOOGLE_SHEETS_FILE_ID) wks_list = spreadsheet.worksheets() dirname = path.dirname(__file__) wks = wks_list[0] count = 0 firstPageList = '' target = [] targetTransliterated = [] firstTime = True #wks = [*wks] makeLists( wks ) # prepare the index tables for chapter pages and chapters, if any. prevSection = '' section = '' for row in [*wks][1:]: verseNumber = row[verseNumberCol] if firstTime: prevSection = section target.append("{{-start-}}\n") if sectionTableList: # only if sectionTableList was prepared, append it. target.append( sectionTableList[0]) # index table of the sections firstPageList += "<div style='text-align: center;float:left;width:100%;'>'''" + 'XXXXXXXXXX' + "'''</div>\n" # title div firstPageList += "\n" firstPageList += '{{#widget:LanguageSelectorWidgetNew|textDiv=sloka|displayDiv=transliteration}}\n' # insert the language selection dropdown firstPageList += '<div class="siva_container">\n' # div for sloka text for transliteration firstPageList += '<div id="sloka";name="sloka" class="siva_sutra">\n' # div for sloka text for transliteration firstPageList += '{{ScriptureTranscludeSource500|\n' # ttemplate #firstPageList += 'YYYYYYYYYY' + '|\n' # Sloka heading targetTransliterated.append('{{ScriptureTransliterateSource500|\n' ) # start code for transliteration getChapterSectionVerse(verseNumber) if firstTime: firstTime = False firstPageList = firstPageList.replace( 'XXXXXXXXXX', getTitle(row)) # update title now that we know it. #firstPageList = firstPageList.replace ( 'YYYYYYYYYY', (row[sectionNameCol] or getTitle(row))) # update Section now that we know it. target.append(firstPageList) prevSection = section if (section != prevSection): target.append("}}\n</div>\n" ) # complete template ScriptureTranscludeSource500 targetTransliterated.append( "}}\n</div>\n" ) # complete template ScriptureTransliteratedSource500 target.append(targetTransliterated) # add the transliterated table if (chapter == ''): target.append( sectionTableList[0]) # index table of the sections else: target.append( sectionTableList[int(chapter) - 1]) # index table of the sections if (len(chapterList) > 2): target.append(chapterTable) # index table of the chapters target.append("{{-stop-}}\n") #targetTransliterated.append("}}\n") printFile("verses/" + bookName + chapter + prevSection, target) prevSection = section target.clear() targetTransliterated.clear() target.append("{{-start-}}\n") if (chapter == ''): target.append( sectionTableList[0]) # index table of the sections else: target.append( sectionTableList[int(chapter) - 1]) # index table of the sections target.append( "<div style='text-align: center;float:left;width:100%;'>'''" + getTitle(row) + "'''</div>\n") # title div target.append("\n") target.append( '{{#widget:LanguageSelectorWidgetNew|textDiv=sloka|displayDiv=transliteration}}\n' ) # insert the language selection dropdown target.append('<div class="siva_container">\n') target.append('<div id="sloka";name="sloka" class="siva_sutra">\n') target.append('{{ScriptureTranscludeSource500|\n') #target.append((row[sectionNameCol] or getTitle(row)) + "|\n" ) targetTransliterated.append('{{ScriptureTransliterateSource500|\n' ) # start code for transliteration lines = str(row[slokaCol]).splitlines() linesRead = 0 # no of lines of text witin the given cell linesToRead = len(lines) # no of lines of text witin the given cell for e in lines: e = '{{ns}}' + e.lstrip().rstrip( ) #strip all leand and traing white space if e.rstrip( '|') == '': # ignore empty lines or lines with only '|' continue parts = e.rsplit('||', 2) if len(parts) > 1: appendSlokaNo = True else: appendSlokaNo = False e = e.replace("|", "{{!}}") if appendSlokaNo and verseNumber: e = e + verseNumber + "{{!}}{{!}}|\n" else: e = e + "|\n" if debug(): print("line: ", e) if linesRead < linesToRead: transliteratedText = transliterate.process('autodetect', 'IAST', e, nativize=False) target.append(e) targetTransliterated.append(transliteratedText) if appendSlokaNo: target.append("----\n") targetTransliterated.append("----\n") linesRead += 1 target.append( "}}\n</div>\n") # complete template ScriptureTranscludeSource500 targetTransliterated.append( "}}\n</div>\n") # complete template ScriptureTransliteratedSource500 target.append(targetTransliterated) if (chapter == ''): if sectionTableList: # only if at least one sectionTableList was prepared, append it. target.append(sectionTableList[0]) # index table of the sections else: target.append(sectionTableList[int(chapter) - 1]) # index table of the sections if (len(chapterList) > 2): target.append(chapterTable) # index table of the chapters target.append("{{-stop-}}\n") printFile("verses/" + bookName + chapter + prevSection, target) target.clear() targetTransliterated.clear() debug()
from aksharamukha import transliterate print(transliterate.process('HK', 'Siddham', 'buddhaH')) print(transliterate.process('autodetect', 'IAST', 'ꯃꯤꯇꯩ_ꯃꯌꯦꯛ')) print(transliterate.process('HK', 'Tamil', 'maMgaLa', False)) print( transliterate.process( 'HK', 'Tamil', 'bRhaspati gaMgA', False, post_options=['TamilSubScript', 'TamilRemoveApostrophe'])) print( transliterate.process('Thai', 'Devanagari', 'พุทธัง สะระณัง คัจฉามิ', pre_options=['ThaiOrthography'])) print(transliterate.process('autodetect', 'IAST', 'พุทธัง สะระณัง คัจฉามิ')) print(transliterate.auto_detect('ꯃꯤꯇꯩ_ꯃꯌꯦꯛ')) print( transliterate.process( 'Devanagari', 'IAST', 'धर्म भारत की श्रमण परम्परा से निकला धर्म और दर्शन है',
def describe_list_semitic(): from main import get_semitic_json from aksharamukha import GeneralMap semitic_json = get_semitic_json() script_json = [script for script in GM.SemiticScripts if script != 'Latn'] for script1 in script_json: for script2 in ['Latn', 'Type']: charsScript1 = [] charsScript1R = [] charsScript2 = [] charsScript2R = [] charsLatn = [] results = {} vowels = GeneralMap.semiticVowelsAll vowelsInitial = GeneralMap.vowelsInitialAll m = transliterate.process('Latn', script1, 'm') for lat, char in semitic_json['ssub']['Latn'][script1].items(): latOrig = lat if lat in vowels: lat = 'm' + lat char = m + char charsScript1.append(char) charguide = transliterate.process('Latn', script2, lat, nativize=False) charsScript2.append(charguide) if latOrig in vowels or latOrig in vowelsInitial: charguideReverse = transliterate.process(script2, script1, charguide, nativize=False) else: if latOrig != 'ˀâ': charguideReverse = transliterate.process(script2, script1, charguide, nativize=False, \ post_options = ['removeVowelsSyriac', 'removeDiacriticsArabic', 'ArabAtoAleph', ''])\ .replace('\u05B7', '').replace('\u07A6', '') else: charguideReverse = transliterate.process( script2, script1, charguide, nativize=False) charsScript2R.append(charguideReverse) charReverse = transliterate.process(script1, script2, char, nativize=False) charsScript1R.append(charReverse) charsLatn.append(lat) #print(charsScript2R) results['script1'] = charsScript1 results['script1R'] = charsScript1R results['script2'] = charsScript2 results['script2R'] = charsScript2R results['scriptLatn'] = charsLatn print(script1, script2) f = io.open("resources/semitic_syllabary/semitic_syllabary_" + script1 + "_" + script2 + ".json", mode="w", encoding="utf-8") f.write( json.dumps(results, ensure_ascii=False, sort_keys=True, indent=4)) f.close()
def describe_list_semitic(): semitic_json = get_semitic_json() script1 = request.json['script1'] script2 = request.json['script2'] if script2 in ['Type', 'Latn']: f = open("resources/semitic_syllabary/semitic_syllabary_" + script1 + "_" + script2 + ".json", 'r', encoding='utf-8') results_final = json.loads(f.read()) f.close() return jsonify(results_final) charsScript1 = [] charsScript1R = [] charsScript2 = [] charsScript2R = [] charsLatn = [] results = {} vowels = GeneralMap.semiticVowelsAll vowelsInitial = GeneralMap.vowelsInitialAll m = transliterate.process('Latn', script1, 'm') for lat, char in semitic_json['ssub']['Latn'][script1].items(): latOrig = lat if lat in vowels: lat = 'm' + lat char = m + char charsScript1.append(char) charguide = transliterate.process('Latn', script2, lat, nativize=False) charsScript2.append(charguide) if latOrig in vowels or latOrig in vowelsInitial: charguideReverse = transliterate.process(script2, script1, charguide, nativize=False) else: if latOrig != 'ˀâ': charguideReverse = transliterate.process(script2, script1, charguide, nativize=False, \ post_options = ['removeVowelsSyriac', 'removeDiacriticsArabic', 'ArabAtoAleph', ''])\ .replace('\u05B7', '').replace('\u07A6', '') else: charguideReverse = transliterate.process(script2, script1, charguide, nativize=False) charsScript2R.append(charguideReverse) charReverse = transliterate.process(script1, script2, char, nativize=False) charsScript1R.append(charReverse) charsLatn.append(lat) #print(charsScript2R) results['script1'] = charsScript1 results['script1R'] = charsScript1R results['script2'] = charsScript2 results['script2R'] = charsScript2R results['scriptLatn'] = charsLatn return jsonify(results)
def main(argv): global GOOGLE_SHEETS_FILE_ID global prevChapter global prevSection global chapter global section global title global bookName global xcelFile global csvFile global skipCount global target global targetTransliterated global newCsvPage bookName = '' xcelFile = '' csvFile = '' skipCount = 1 try: opts, args = getopt.getopt( argv, ":g:c:x:b:s:", ["GoogleSheetId=", "CsvFile=", "Skip=", "BookName=", "Excel="]) except getopt.GetoptError: print( 'pg.py [ -g <GoogleSheetId> | -x <ExcelFile> | -c <CsvFile> ] [-s <SkipLines>] [-b <BookName>]' ) sys.exit(2) for opt, arg in opts: if opt in ("-s", "--Skip"): skipCount = int(arg) if opt in ("-b", "--BookName"): bookName = arg if opt in ("-g", "--GoogleSheetId"): GOOGLE_SHEETS_FILE_ID = arg if bookName == '': bookName = (str(arg).split('.', 2)[0]).replace( ' ', '_') # first part ofthe filename is taken as the bookname elif opt in ("-x", "--Excel"): GOOGLE_SHEETS_FILE_ID = "" xcelFile = arg if bookName == '': bookName = (str(arg).split('.', 2)[0]).replace( ' ', '_') # first part ofthe filename is taken as the bookname #bookName = arg elif opt in ("-c", "--CsvFile"): GOOGLE_SHEETS_FILE_ID = "" csvFile = arg if bookName == '': bookName = (str(arg).split('.', 2)[0]).replace( ' ', '_') # first part ofthe filename is taken as the bookname if (GOOGLE_SHEETS_FILE_ID == '' and xcelFile == '' and csvFile == '') or bookName == '': print( 'pg.py [ -g <GoogleSheetId> | -x <ExcelFile> | -c <CsvFile> ] [-s <SkipLines>] [-b <BookName>]' ) sys.exit() if GOOGLE_SHEETS_FILE_ID: google_client = pygsheets.authorize() spreadsheet = google_client.open_by_key(GOOGLE_SHEETS_FILE_ID) wks_list = spreadsheet.worksheets() wks = wks_list[0].get_all_values() elif csvFile: # pip3 install requests import requests df = pd.read_csv(csvFile, header=None, skiprows=None) df.insert(0, 'RowNumber', '') df.insert(0, 'Section', '') df.insert(0, 'Chapter', '') wks = df.values elif xcelFile: # pip3 install openpyxl from openpyxl import load_workbook wks = pd.read_excel(xcelFile, sheet_name=0, header=None, skiprows=None).values #wks= df count = 0 firstTime = True #wks = [*wks] makeLists( wks ) # prepare the index tables for chapter pages and chapters, if any. prevSection = '' prevChapter = '' section = '' firstPageList = '' target = [] targetTransliterated = [] newCsvPage = 1 senp = 1 for row in [*wks][skipCount - 2:]: verseNumber = row[verseNumberCol] if firstTime: prevSection = section prevChapter = chapter target.append("{{-start-}}\n") appendPageLists() firstPageList = startPageHeader('XXXXXXXXXX') targetTransliterated.append('{{ScriptureTransliterateSource500|\n' ) # start code for transliteration getChapterSectionVerse(verseNumber) if firstTime: firstTime = False firstPageList = firstPageList.replace( 'XXXXXXXXXX', getTitle(row)) # update title now that we know it. target.append(firstPageList) prevSection = section if (section != prevSection): startNewPage(row) lines = str(row[slokaCol]).splitlines() linesRead = 0 # no of lines of text witin the given cell linesToRead = len(lines) # no of lines of text witin the given cell for e in lines: e = '{{ns}}' + e.lstrip().rstrip( ) #strip all leand and traing white space if e.rstrip('|') == '{{ns}}StartNewPage': if csvFile: prevChapter = newCsvPage newCsvPage += 1 startNewPage( row) # empty line in csv file starts a new page #else: continue # ignore empty lines or lines with only '|' for excel or google sheets parts = e.rsplit('||', 2) if len(parts) > 1: appendSlokaNo = True else: appendSlokaNo = False e = e.replace("|", "{{!}}") if appendSlokaNo and verseNumber: e = e + verseNumber + "{{!}}{{!}}|\n" else: e = e + "|\n" if debug(): print("line: ", e) if linesRead < linesToRead: transliteratedText = transliterate.process('autodetect', 'IAST', e, nativize=False) target.append(e) targetTransliterated.append(transliteratedText) if appendSlokaNo: target.append("----\n") targetTransliterated.append("----\n") linesRead += 1 if csvFile: prevChapter = newCsvPage newCsvPage += 1 startNewPage(row) debug()