def semiticListTransliterate(charlist, script2, fix=''):
    results = {}

    results['Latn'] = charlist.replace('،', ',').split(',')
    results['script2'] = transliterate.process('Latn',
                                               script2,
                                               charlist,
                                               nativize=False).replace(
                                                   '،', ',').split(',')
    results['script2R'] =  transliterate.process(script2, 'Latn', transliterate.process('Latn', script2, charlist, nativize=False))\
        .replace('،',',').split(',')

    results['script2G'] =  transliterate.process('Brahmi', script2, transliterate.process('Latn', 'Brah', charlist))\
        .replace('،',',').split(',')
    results['script2GR'] =  transliterate.process('Brah', 'Latn', transliterate.process('Latn', 'Brah', charlist))\
        .replace('،',',').split(',')

    for script in GM.SemiticScripts:
        if script != 'Latn' and script != 'Arab-Ph' and script != 'Arab-Ga':
            #print(script)
            results[script] = transliterate.process('Latn', script, charlist, nativize=False)\
                .replace('،',',').split(',')
            results[script + 'R'] = transliterate.process(script, 'Latn', \
                transliterate.process('Latn', script, charlist, nativize=False), nativize=False)\
                .replace('،',',').split(',')

    if fix == 'alephA':
        results['script2R'][0] = 'ʾ'
    elif fix == 'inherentA':
        results['script2R'][0] += 'a'

    return results
Example #2
0
def convertNumerals(bnLine, enLine):
    newBnLine, newEnLine = bnLine, enLine

    for enNumeral in re.findall(r'[0-9]+', newBnLine, flags=re.UNICODE):
        newBnLine = newBnLine.replace(
            enNumeral,
            transliterate.process('RomanReadable', 'Bengali', enNumeral), 1)

    for bnNumeral in re.findall(r'[০-৯]+', newEnLine, flags=re.UNICODE):
        newEnLine = newEnLine.replace(
            bnNumeral,
            transliterate.process('Bengali', 'RomanReadable', bnNumeral), 1)

    return newBnLine, newEnLine
Example #3
0
    def get(self, uid, target):
        db = get_db()

        results = db.aql.execute(SUTTA_SINGLE_PALI_TEXT,
                                 bind_vars={'uid': uid})
        result = next(results)
        if not result:
            return {'error': 'Not Found'}, 404

        sutta_texts = {k: json_load(v) for k, v in result.items()}
        for key, value in sutta_texts[uid].items():
            sutta_texts[uid][key] = transliterate.process('ISO', target, value)

        return sutta_texts[uid]
Example #4
0
def romanize_file(fname):
    with codecs.open(fname + ".txt", encoding='utf-8') as f:
        parsed = f.readlines()

    parsed = [x.strip() for x in parsed]

    trans_list = []
    print(parsed)
    for word in parsed:
        text = word
        translated_txt = transliterate.process('Telugu', 'ISO', word)
        print(word)
        print(translated_txt)
        trans_list.append(translated_txt)

    with io.open(fname + "_transliterated.txt", "w", encoding="utf-8") as f:
        for padam in trans_list:
            f.write(padam + '\n')

    return trans_list
Example #5
0
 def get(self, target, text):
     return transliterate.process('IAST', target, text)
def main(argv):
    global GOOGLE_SHEETS_FILE_ID
    global prevSection
    global section
    global title
    global bookName
    try:
        opts, args = getopt.getopt(argv, "i:b:", ["InputFile=", "BookName="])
    except getopt.GetoptError:
        print('pg.py [-i] <GoogleSheetId> [-b <BookName>]')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-i", "--ifile"):
            GOOGLE_SHEETS_FILE_ID = arg
        elif opt in ("-b", "--BookName"):
            bookName = arg
    if GOOGLE_SHEETS_FILE_ID == '' or bookName == '':
        print('pg.py [-i] <GoogleSheetId> [-b <BookName>]')
        sys.exit()
    google_client = pygsheets.authorize()
    spreadsheet = google_client.open_by_key(GOOGLE_SHEETS_FILE_ID)
    wks_list = spreadsheet.worksheets()
    dirname = path.dirname(__file__)
    wks = wks_list[0]
    count = 0
    firstPageList = ''
    target = []
    targetTransliterated = []
    firstTime = True
    #wks = [*wks]
    makeLists(
        wks
    )  # prepare the index tables for chapter pages and chapters, if any.
    prevSection = ''
    section = ''
    for row in [*wks][1:]:
        verseNumber = row[verseNumberCol]
        if firstTime:
            prevSection = section
            target.append("{{-start-}}\n")
            if sectionTableList:  # only if sectionTableList was prepared, append it.
                target.append(
                    sectionTableList[0])  # index table of the sections
            firstPageList += "<div style='text-align: center;float:left;width:100%;'>'''" + 'XXXXXXXXXX' + "'''</div>\n"  # title div
            firstPageList += "\n"
            firstPageList += '{{#widget:LanguageSelectorWidgetNew|textDiv=sloka|displayDiv=transliteration}}\n'  # insert the language selection dropdown
            firstPageList += '<div class="siva_container">\n'  # div for sloka text for transliteration
            firstPageList += '<div id="sloka";name="sloka" class="siva_sutra">\n'  # div for sloka text for transliteration
            firstPageList += '{{ScriptureTranscludeSource500|\n'  # ttemplate
            #firstPageList += 'YYYYYYYYYY' + '|\n'  # Sloka heading
            targetTransliterated.append('{{ScriptureTransliterateSource500|\n'
                                        )  # start code for transliteration
        getChapterSectionVerse(verseNumber)
        if firstTime:
            firstTime = False
            firstPageList = firstPageList.replace(
                'XXXXXXXXXX',
                getTitle(row))  # update title now that we know it.
            #firstPageList = firstPageList.replace ( 'YYYYYYYYYY', (row[sectionNameCol] or getTitle(row)))    # update Section now that we know it.
            target.append(firstPageList)
            prevSection = section
        if (section != prevSection):
            target.append("}}\n</div>\n"
                          )  # complete template ScriptureTranscludeSource500
            targetTransliterated.append(
                "}}\n</div>\n"
            )  # complete template ScriptureTransliteratedSource500
            target.append(targetTransliterated)  # add the transliterated table
            if (chapter == ''):
                target.append(
                    sectionTableList[0])  # index table of the sections
            else:
                target.append(
                    sectionTableList[int(chapter) -
                                     1])  # index table of the sections
            if (len(chapterList) > 2):
                target.append(chapterTable)  # index table of the chapters
            target.append("{{-stop-}}\n")
            #targetTransliterated.append("}}\n")
            printFile("verses/" + bookName + chapter + prevSection, target)
            prevSection = section
            target.clear()
            targetTransliterated.clear()
            target.append("{{-start-}}\n")
            if (chapter == ''):
                target.append(
                    sectionTableList[0])  # index table of the sections
            else:
                target.append(
                    sectionTableList[int(chapter) -
                                     1])  # index table of the sections
            target.append(
                "<div style='text-align: center;float:left;width:100%;'>'''" +
                getTitle(row) + "'''</div>\n")  # title div
            target.append("\n")
            target.append(
                '{{#widget:LanguageSelectorWidgetNew|textDiv=sloka|displayDiv=transliteration}}\n'
            )  # insert the language selection dropdown
            target.append('<div class="siva_container">\n')
            target.append('<div id="sloka";name="sloka" class="siva_sutra">\n')
            target.append('{{ScriptureTranscludeSource500|\n')
            #target.append((row[sectionNameCol] or getTitle(row)) + "|\n" )
            targetTransliterated.append('{{ScriptureTransliterateSource500|\n'
                                        )  # start code for transliteration
        lines = str(row[slokaCol]).splitlines()
        linesRead = 0  # no of lines of text witin the given cell
        linesToRead = len(lines)  # no of lines of text witin the given cell
        for e in lines:
            e = '{{ns}}' + e.lstrip().rstrip(
            )  #strip all leand and traing white space
            if e.rstrip(
                    '|') == '':  # ignore empty lines or lines with only '|'
                continue
            parts = e.rsplit('||', 2)
            if len(parts) > 1:
                appendSlokaNo = True
            else:
                appendSlokaNo = False
            e = e.replace("|", "{{!}}")
            if appendSlokaNo and verseNumber:
                e = e + verseNumber + "{{!}}{{!}}|\n"
            else:
                e = e + "|\n"
            if debug():
                print("line: ", e)
            if linesRead < linesToRead:
                transliteratedText = transliterate.process('autodetect',
                                                           'IAST',
                                                           e,
                                                           nativize=False)
                target.append(e)
                targetTransliterated.append(transliteratedText)
            if appendSlokaNo:
                target.append("----\n")
                targetTransliterated.append("----\n")
            linesRead += 1

    target.append(
        "}}\n</div>\n")  # complete template ScriptureTranscludeSource500
    targetTransliterated.append(
        "}}\n</div>\n")  # complete template ScriptureTransliteratedSource500
    target.append(targetTransliterated)
    if (chapter == ''):
        if sectionTableList:  # only if at least one sectionTableList was prepared, append it.
            target.append(sectionTableList[0])  # index table of the sections
    else:
        target.append(sectionTableList[int(chapter) -
                                       1])  # index table of the sections
    if (len(chapterList) > 2):
        target.append(chapterTable)  # index table of the chapters
    target.append("{{-stop-}}\n")
    printFile("verses/" + bookName + chapter + prevSection, target)
    target.clear()
    targetTransliterated.clear()
    debug()
Example #7
0
from aksharamukha import transliterate

print(transliterate.process('HK', 'Siddham', 'buddhaH'))

print(transliterate.process('autodetect', 'IAST', 'ꯃꯤꯇꯩ_ꯃꯌꯦꯛ'))

print(transliterate.process('HK', 'Tamil', 'maMgaLa', False))

print(
    transliterate.process(
        'HK',
        'Tamil',
        'bRhaspati gaMgA',
        False,
        post_options=['TamilSubScript', 'TamilRemoveApostrophe']))

print(
    transliterate.process('Thai',
                          'Devanagari',
                          'พุทธัง สะระณัง คัจฉามิ',
                          pre_options=['ThaiOrthography']))

print(transliterate.process('autodetect', 'IAST', 'พุทธัง สะระณัง คัจฉามิ'))

print(transliterate.auto_detect('ꯃꯤꯇꯩ_ꯃꯌꯦꯛ'))

print(
    transliterate.process(
        'Devanagari',
        'IAST',
        'धर्म भारत की श्रमण परम्परा से निकला धर्म और दर्शन है',
def describe_list_semitic():
    from main import get_semitic_json
    from aksharamukha import GeneralMap

    semitic_json = get_semitic_json()
    script_json = [script for script in GM.SemiticScripts if script != 'Latn']

    for script1 in script_json:
        for script2 in ['Latn', 'Type']:
            charsScript1 = []
            charsScript1R = []
            charsScript2 = []
            charsScript2R = []
            charsLatn = []

            results = {}
            vowels = GeneralMap.semiticVowelsAll
            vowelsInitial = GeneralMap.vowelsInitialAll

            m = transliterate.process('Latn', script1, 'm')
            for lat, char in semitic_json['ssub']['Latn'][script1].items():
                latOrig = lat
                if lat in vowels:
                    lat = 'm' + lat
                    char = m + char

                charsScript1.append(char)
                charguide = transliterate.process('Latn',
                                                  script2,
                                                  lat,
                                                  nativize=False)
                charsScript2.append(charguide)

                if latOrig in vowels or latOrig in vowelsInitial:
                    charguideReverse = transliterate.process(script2,
                                                             script1,
                                                             charguide,
                                                             nativize=False)
                else:
                    if latOrig != 'ˀâ':
                        charguideReverse = transliterate.process(script2, script1, charguide, nativize=False, \
                            post_options = ['removeVowelsSyriac', 'removeDiacriticsArabic', 'ArabAtoAleph', ''])\
                                .replace('\u05B7', '').replace('\u07A6', '')
                    else:
                        charguideReverse = transliterate.process(
                            script2, script1, charguide, nativize=False)

                charsScript2R.append(charguideReverse)

                charReverse = transliterate.process(script1,
                                                    script2,
                                                    char,
                                                    nativize=False)
                charsScript1R.append(charReverse)
                charsLatn.append(lat)

            #print(charsScript2R)

            results['script1'] = charsScript1
            results['script1R'] = charsScript1R
            results['script2'] = charsScript2
            results['script2R'] = charsScript2R
            results['scriptLatn'] = charsLatn
            print(script1, script2)
            f = io.open("resources/semitic_syllabary/semitic_syllabary_" +
                        script1 + "_" + script2 + ".json",
                        mode="w",
                        encoding="utf-8")
            f.write(
                json.dumps(results,
                           ensure_ascii=False,
                           sort_keys=True,
                           indent=4))
            f.close()
Example #9
0
def describe_list_semitic():
    semitic_json = get_semitic_json()

    script1 = request.json['script1']
    script2 = request.json['script2']

    if script2 in ['Type', 'Latn']:
        f = open("resources/semitic_syllabary/semitic_syllabary_" + script1 +
                 "_" + script2 + ".json",
                 'r',
                 encoding='utf-8')
        results_final = json.loads(f.read())
        f.close()

        return jsonify(results_final)

    charsScript1 = []
    charsScript1R = []
    charsScript2 = []
    charsScript2R = []
    charsLatn = []

    results = {}
    vowels = GeneralMap.semiticVowelsAll
    vowelsInitial = GeneralMap.vowelsInitialAll

    m = transliterate.process('Latn', script1, 'm')
    for lat, char in semitic_json['ssub']['Latn'][script1].items():
        latOrig = lat
        if lat in vowels:
            lat = 'm' + lat
            char = m + char

        charsScript1.append(char)
        charguide = transliterate.process('Latn', script2, lat, nativize=False)
        charsScript2.append(charguide)

        if latOrig in vowels or latOrig in vowelsInitial:
            charguideReverse = transliterate.process(script2,
                                                     script1,
                                                     charguide,
                                                     nativize=False)
        else:
            if latOrig != 'ˀâ':
                charguideReverse = transliterate.process(script2, script1, charguide, nativize=False, \
                    post_options = ['removeVowelsSyriac', 'removeDiacriticsArabic', 'ArabAtoAleph', ''])\
                        .replace('\u05B7', '').replace('\u07A6', '')
            else:
                charguideReverse = transliterate.process(script2,
                                                         script1,
                                                         charguide,
                                                         nativize=False)

        charsScript2R.append(charguideReverse)

        charReverse = transliterate.process(script1,
                                            script2,
                                            char,
                                            nativize=False)
        charsScript1R.append(charReverse)
        charsLatn.append(lat)

    #print(charsScript2R)

    results['script1'] = charsScript1
    results['script1R'] = charsScript1R
    results['script2'] = charsScript2
    results['script2R'] = charsScript2R
    results['scriptLatn'] = charsLatn

    return jsonify(results)
Example #10
0
def main(argv):
    global GOOGLE_SHEETS_FILE_ID
    global prevChapter
    global prevSection
    global chapter
    global section
    global title
    global bookName
    global xcelFile
    global csvFile
    global skipCount
    global target
    global targetTransliterated
    global newCsvPage
    bookName = ''
    xcelFile = ''
    csvFile = ''
    skipCount = 1

    try:
        opts, args = getopt.getopt(
            argv, ":g:c:x:b:s:",
            ["GoogleSheetId=", "CsvFile=", "Skip=", "BookName=", "Excel="])
    except getopt.GetoptError:
        print(
            'pg.py [ -g <GoogleSheetId> | -x <ExcelFile> | -c <CsvFile> ] [-s <SkipLines>] [-b <BookName>]'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-s", "--Skip"):
            skipCount = int(arg)
        if opt in ("-b", "--BookName"):
            bookName = arg
        if opt in ("-g", "--GoogleSheetId"):
            GOOGLE_SHEETS_FILE_ID = arg
            if bookName == '':
                bookName = (str(arg).split('.', 2)[0]).replace(
                    ' ',
                    '_')  # first part ofthe filename is taken as the bookname
        elif opt in ("-x", "--Excel"):
            GOOGLE_SHEETS_FILE_ID = ""
            xcelFile = arg
            if bookName == '':
                bookName = (str(arg).split('.', 2)[0]).replace(
                    ' ',
                    '_')  # first part ofthe filename is taken as the bookname
                #bookName = arg
        elif opt in ("-c", "--CsvFile"):
            GOOGLE_SHEETS_FILE_ID = ""
            csvFile = arg
            if bookName == '':
                bookName = (str(arg).split('.', 2)[0]).replace(
                    ' ',
                    '_')  # first part ofthe filename is taken as the bookname

    if (GOOGLE_SHEETS_FILE_ID == '' and xcelFile == ''
            and csvFile == '') or bookName == '':
        print(
            'pg.py [ -g <GoogleSheetId> | -x <ExcelFile> | -c <CsvFile> ] [-s <SkipLines>] [-b <BookName>]'
        )
        sys.exit()
    if GOOGLE_SHEETS_FILE_ID:
        google_client = pygsheets.authorize()
        spreadsheet = google_client.open_by_key(GOOGLE_SHEETS_FILE_ID)
        wks_list = spreadsheet.worksheets()
        wks = wks_list[0].get_all_values()
    elif csvFile:
        # pip3 install requests
        import requests
        df = pd.read_csv(csvFile, header=None, skiprows=None)
        df.insert(0, 'RowNumber', '')
        df.insert(0, 'Section', '')
        df.insert(0, 'Chapter', '')
        wks = df.values
    elif xcelFile:
        # pip3 install openpyxl
        from openpyxl import load_workbook
        wks = pd.read_excel(xcelFile, sheet_name=0, header=None,
                            skiprows=None).values
        #wks= df

    count = 0
    firstTime = True
    #wks = [*wks]
    makeLists(
        wks
    )  # prepare the index tables for chapter pages and chapters, if any.

    prevSection = ''
    prevChapter = ''
    section = ''
    firstPageList = ''
    target = []
    targetTransliterated = []
    newCsvPage = 1
    senp = 1
    for row in [*wks][skipCount - 2:]:
        verseNumber = row[verseNumberCol]
        if firstTime:
            prevSection = section
            prevChapter = chapter
            target.append("{{-start-}}\n")
            appendPageLists()
            firstPageList = startPageHeader('XXXXXXXXXX')
            targetTransliterated.append('{{ScriptureTransliterateSource500|\n'
                                        )  # start code for transliteration
        getChapterSectionVerse(verseNumber)
        if firstTime:
            firstTime = False
            firstPageList = firstPageList.replace(
                'XXXXXXXXXX',
                getTitle(row))  # update title now that we know it.
            target.append(firstPageList)
            prevSection = section
        if (section != prevSection):
            startNewPage(row)
        lines = str(row[slokaCol]).splitlines()
        linesRead = 0  # no of lines of text witin the given cell
        linesToRead = len(lines)  # no of lines of text witin the given cell
        for e in lines:
            e = '{{ns}}' + e.lstrip().rstrip(
            )  #strip all leand and traing white space
            if e.rstrip('|') == '{{ns}}StartNewPage':
                if csvFile:
                    prevChapter = newCsvPage
                    newCsvPage += 1
                    startNewPage(
                        row)  # empty line in csv file starts a new page
                    #else:
                    continue  # ignore empty lines or lines with only '|' for excel or google sheets
            parts = e.rsplit('||', 2)
            if len(parts) > 1:
                appendSlokaNo = True
            else:
                appendSlokaNo = False
            e = e.replace("|", "{{!}}")
            if appendSlokaNo and verseNumber:
                e = e + verseNumber + "{{!}}{{!}}|\n"
            else:
                e = e + "|\n"
            if debug():
                print("line: ", e)
            if linesRead < linesToRead:
                transliteratedText = transliterate.process('autodetect',
                                                           'IAST',
                                                           e,
                                                           nativize=False)
                target.append(e)
                targetTransliterated.append(transliteratedText)
            if appendSlokaNo:
                target.append("----\n")
                targetTransliterated.append("----\n")
            linesRead += 1
    if csvFile:
        prevChapter = newCsvPage
        newCsvPage += 1
    startNewPage(row)
    debug()